diff options
Diffstat (limited to 'drivers/net')
57 files changed, 6135 insertions, 1527 deletions
diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index 1aaa7a95ebc4..ae9e7f7cb31c 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -18,3 +18,13 @@ config NET_DSA_MV88E6XXX_GLOBAL2 It is required on most chips. If the chip you compile the support for doesn't have such registers set, say N here. In doubt, say Y. + +config NET_DSA_MV88E6XXX_PTP + bool "PTP support for Marvell 88E6xxx" + default n + depends on NET_DSA_MV88E6XXX_GLOBAL2 + imply NETWORK_PHY_TIMESTAMPING + imply PTP_1588_CLOCK + help + Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch + chips that support it. diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index 58a4a0014e59..50de304abe2f 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -5,6 +5,10 @@ mv88e6xxx-objs += global1.o mv88e6xxx-objs += global1_atu.o mv88e6xxx-objs += global1_vtu.o mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o +mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_avb.o +mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2_scratch.o +mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += hwtstamp.o mv88e6xxx-objs += phy.o mv88e6xxx-objs += port.o +mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_PTP) += ptp.o mv88e6xxx-objs += serdes.o diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index eb328bade225..39c7ad7e490f 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -36,8 +36,10 @@ #include "chip.h" #include "global1.h" #include "global2.h" +#include "hwtstamp.h" #include "phy.h" #include "port.h" +#include "ptp.h" #include "serdes.h" static void assert_reg_lock(struct mv88e6xxx_chip *chip) @@ -712,9 +714,12 @@ static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port, for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) { stat = &mv88e6xxx_hw_stats[i]; if (stat->type & types) { + mutex_lock(&chip->reg_lock); data[j] = _mv88e6xxx_get_ethtool_stat(chip, stat, port, bank1_select, histogram); + mutex_unlock(&chip->reg_lock); + j++; } } @@ -762,14 +767,13 @@ static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); ret = mv88e6xxx_stats_snapshot(chip, port); - if (ret < 0) { - mutex_unlock(&chip->reg_lock); + mutex_unlock(&chip->reg_lock); + + if (ret < 0) return; - } mv88e6xxx_get_stats(chip, port, data); - mutex_unlock(&chip->reg_lock); } static int mv88e6xxx_stats_set_histogram(struct mv88e6xxx_chip *chip) @@ -1433,7 +1437,9 @@ static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip, eth_broadcast_addr(addr.mac); do { + mutex_lock(&chip->reg_lock); err = mv88e6xxx_g1_atu_getnext(chip, fid, &addr); + mutex_unlock(&chip->reg_lock); if (err) return err; @@ -1466,7 +1472,10 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port, int err; /* Dump port's default Filtering Information Database (VLAN ID 0) */ + mutex_lock(&chip->reg_lock); err = mv88e6xxx_port_get_fid(chip, port, &fid); + mutex_unlock(&chip->reg_lock); + if (err) return err; @@ -1476,7 +1485,9 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port, /* Dump VLANs' Filtering Information Databases */ do { + mutex_lock(&chip->reg_lock); err = mv88e6xxx_vtu_getnext(chip, &vlan); + mutex_unlock(&chip->reg_lock); if (err) return err; @@ -1496,13 +1507,8 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data) { struct mv88e6xxx_chip *chip = ds->priv; - int err; - - mutex_lock(&chip->reg_lock); - err = mv88e6xxx_port_db_dump(chip, port, cb, data); - mutex_unlock(&chip->reg_lock); - return err; + return mv88e6xxx_port_db_dump(chip, port, cb, data); } static int mv88e6xxx_bridge_map(struct mv88e6xxx_chip *chip, @@ -2092,6 +2098,17 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; + /* Setup PTP Hardware Clock and timestamping */ + if (chip->info->ptp_support) { + err = mv88e6xxx_ptp_setup(chip); + if (err) + goto unlock; + + err = mv88e6xxx_hwtstamp_setup(chip); + if (err) + goto unlock; + } + unlock: mutex_unlock(&chip->reg_lock); @@ -2472,6 +2489,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, + .gpio_ops = &mv88e6352_gpio_ops, }; static const struct mv88e6xxx_ops mv88e6161_ops = { @@ -2602,6 +2620,7 @@ static const struct mv88e6xxx_ops mv88e6172_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6352_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, }; static const struct mv88e6xxx_ops mv88e6175_ops = { @@ -2673,6 +2692,7 @@ static const struct mv88e6xxx_ops mv88e6176_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6352_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, }; static const struct mv88e6xxx_ops mv88e6185_ops = { @@ -2736,6 +2756,7 @@ static const struct mv88e6xxx_ops mv88e6190_ops = { .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, }; static const struct mv88e6xxx_ops mv88e6190x_ops = { @@ -2771,6 +2792,7 @@ static const struct mv88e6xxx_ops mv88e6190x_ops = { .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, }; static const struct mv88e6xxx_ops mv88e6191_ops = { @@ -2843,6 +2865,8 @@ static const struct mv88e6xxx_ops mv88e6240_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6352_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6352_avb_ops, }; static const struct mv88e6xxx_ops mv88e6290_ops = { @@ -2879,6 +2903,8 @@ static const struct mv88e6xxx_ops mv88e6290_ops = { .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6390_avb_ops, }; static const struct mv88e6xxx_ops mv88e6320_ops = { @@ -2913,6 +2939,8 @@ static const struct mv88e6xxx_ops mv88e6320_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6352_avb_ops, }; static const struct mv88e6xxx_ops mv88e6321_ops = { @@ -2945,6 +2973,8 @@ static const struct mv88e6xxx_ops mv88e6321_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6185_g1_vtu_getnext, .vtu_loadpurge = mv88e6185_g1_vtu_loadpurge, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6352_avb_ops, }; static const struct mv88e6xxx_ops mv88e6341_ops = { @@ -2981,6 +3011,8 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6390_avb_ops, }; static const struct mv88e6xxx_ops mv88e6350_ops = { @@ -3049,6 +3081,7 @@ static const struct mv88e6xxx_ops mv88e6351_ops = { .reset = mv88e6352_g1_reset, .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, + .avb_ops = &mv88e6352_avb_ops, }; static const struct mv88e6xxx_ops mv88e6352_ops = { @@ -3086,6 +3119,8 @@ static const struct mv88e6xxx_ops mv88e6352_ops = { .vtu_getnext = mv88e6352_g1_vtu_getnext, .vtu_loadpurge = mv88e6352_g1_vtu_loadpurge, .serdes_power = mv88e6352_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6352_avb_ops, }; static const struct mv88e6xxx_ops mv88e6390_ops = { @@ -3124,6 +3159,8 @@ static const struct mv88e6xxx_ops mv88e6390_ops = { .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6390_avb_ops, }; static const struct mv88e6xxx_ops mv88e6390x_ops = { @@ -3162,6 +3199,8 @@ static const struct mv88e6xxx_ops mv88e6390x_ops = { .vtu_getnext = mv88e6390_g1_vtu_getnext, .vtu_loadpurge = mv88e6390_g1_vtu_loadpurge, .serdes_power = mv88e6390_serdes_power, + .gpio_ops = &mv88e6352_gpio_ops, + .avb_ops = &mv88e6390_avb_ops, }; static const struct mv88e6xxx_info mv88e6xxx_table[] = { @@ -3267,6 +3306,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6341", .num_databases = 4096, .num_ports = 6, + .num_gpio = 11, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3346,6 +3386,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6172", .num_databases = 4096, .num_ports = 7, + .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3386,6 +3427,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6176", .num_databases = 4096, .num_ports = 7, + .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3424,6 +3466,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6190", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ + .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, @@ -3444,6 +3487,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6190X", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ + .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, @@ -3475,6 +3519,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, + .ptp_support = true, .ops = &mv88e6191_ops, }, @@ -3484,6 +3529,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6240", .num_databases = 4096, .num_ports = 7, + .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3495,6 +3541,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, + .ptp_support = true, .ops = &mv88e6240_ops, }, @@ -3504,6 +3551,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6290", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ + .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, @@ -3515,6 +3563,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, + .ptp_support = true, .ops = &mv88e6290_ops, }, @@ -3524,6 +3573,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6320", .num_databases = 4096, .num_ports = 7, + .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3534,6 +3584,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, + .ptp_support = true, .ops = &mv88e6320_ops, }, @@ -3543,6 +3594,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6321", .num_databases = 4096, .num_ports = 7, + .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3552,6 +3604,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .atu_move_port_mask = 0xf, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, + .ptp_support = true, .ops = &mv88e6321_ops, }, @@ -3561,6 +3614,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6341", .num_databases = 4096, .num_ports = 6, + .num_gpio = 11, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3571,6 +3625,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, + .ptp_support = true, .ops = &mv88e6341_ops, }, @@ -3620,6 +3675,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6352", .num_databases = 4096, .num_ports = 7, + .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, .global1_addr = 0x1b, @@ -3631,6 +3687,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_EDSA, + .ptp_support = true, .ops = &mv88e6352_ops, }, [MV88E6390] = { @@ -3639,6 +3696,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6390", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ + .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, @@ -3650,6 +3708,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, + .ptp_support = true, .ops = &mv88e6390_ops, }, [MV88E6390X] = { @@ -3658,6 +3717,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .name = "Marvell 88E6390X", .num_databases = 4096, .num_ports = 11, /* 10 + Z80 */ + .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, .global1_addr = 0x1b, @@ -3669,6 +3729,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .pvt = true, .multi_chip = true, .tag_protocol = DSA_TAG_PROTO_DSA, + .ptp_support = true, .ops = &mv88e6390x_ops, }, }; @@ -3880,6 +3941,11 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_mdb_del = mv88e6xxx_port_mdb_del, .crosschip_bridge_join = mv88e6xxx_crosschip_bridge_join, .crosschip_bridge_leave = mv88e6xxx_crosschip_bridge_leave, + .port_hwtstamp_set = mv88e6xxx_port_hwtstamp_set, + .port_hwtstamp_get = mv88e6xxx_port_hwtstamp_get, + .port_txtstamp = mv88e6xxx_port_txtstamp, + .port_rxtstamp = mv88e6xxx_port_rxtstamp, + .get_ts_info = mv88e6xxx_get_ts_info, }; static struct dsa_switch_driver mv88e6xxx_switch_drv = { @@ -4022,6 +4088,11 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev) struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); struct mv88e6xxx_chip *chip = ds->priv; + if (chip->info->ptp_support) { + mv88e6xxx_hwtstamp_free(chip); + mv88e6xxx_ptp_free(chip); + } + mv88e6xxx_phy_destroy(chip); mv88e6xxx_unregister_switch(chip); mv88e6xxx_mdios_unregister(chip); diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 3dba6e90adcf..97d7915f32c7 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -16,6 +16,8 @@ #include <linux/irq.h> #include <linux/gpio/consumer.h> #include <linux/phy.h> +#include <linux/ptp_clock_kernel.h> +#include <linux/timecounter.h> #include <net/dsa.h> #ifndef UINT64_MAX @@ -39,6 +41,8 @@ #define MV88E6XXX_MAX_PVT_SWITCHES 32 #define MV88E6XXX_MAX_PVT_PORTS 16 +#define MV88E6XXX_MAX_GPIO 16 + enum mv88e6xxx_egress_mode { MV88E6XXX_EGRESS_MODE_UNMODIFIED, MV88E6XXX_EGRESS_MODE_UNTAGGED, @@ -105,6 +109,7 @@ struct mv88e6xxx_info { const char *name; unsigned int num_databases; unsigned int num_ports; + unsigned int num_gpio; unsigned int max_vid; unsigned int port_base_addr; unsigned int global1_addr; @@ -126,6 +131,9 @@ struct mv88e6xxx_info { */ u8 atu_move_port_mask; const struct mv88e6xxx_ops *ops; + + /* Supports PTP */ + bool ptp_support; }; struct mv88e6xxx_atu_entry { @@ -146,6 +154,8 @@ struct mv88e6xxx_vtu_entry { struct mv88e6xxx_bus_ops; struct mv88e6xxx_irq_ops; +struct mv88e6xxx_gpio_ops; +struct mv88e6xxx_avb_ops; struct mv88e6xxx_irq { u16 masked; @@ -154,6 +164,32 @@ struct mv88e6xxx_irq { unsigned int nirqs; }; +/* state flags for mv88e6xxx_port_hwtstamp::state */ +enum { + MV88E6XXX_HWTSTAMP_ENABLED, + MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, +}; + +struct mv88e6xxx_port_hwtstamp { + /* Port index */ + int port_id; + + /* Timestamping state */ + unsigned long state; + + /* Resources for receive timestamping */ + struct sk_buff_head rx_queue; + struct sk_buff_head rx_queue2; + + /* Resources for transmit timestamping */ + unsigned long tx_tstamp_start; + struct sk_buff *tx_skb; + u16 tx_seq_id; + + /* Current timestamp configuration */ + struct hwtstamp_config tstamp_config; +}; + struct mv88e6xxx_chip { const struct mv88e6xxx_info *info; @@ -209,6 +245,26 @@ struct mv88e6xxx_chip { int watchdog_irq; int atu_prob_irq; int vtu_prob_irq; + + /* GPIO resources */ + u8 gpio_data[2]; + + /* This cyclecounter abstracts the switch PTP time. + * reg_lock must be held for any operation that read()s. + */ + struct cyclecounter tstamp_cc; + struct timecounter tstamp_tc; + struct delayed_work overflow_work; + + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_clock_info; + struct delayed_work tai_event_work; + struct ptp_pin_desc pin_config[MV88E6XXX_MAX_GPIO]; + u16 trig_config; + u16 evcap_config; + + /* Per-port timestamping resources. */ + struct mv88e6xxx_port_hwtstamp port_hwtstamp[DSA_MAX_PORTS]; }; struct mv88e6xxx_bus_ops { @@ -344,6 +400,12 @@ struct mv88e6xxx_ops { struct mv88e6xxx_vtu_entry *entry); int (*vtu_loadpurge)(struct mv88e6xxx_chip *chip, struct mv88e6xxx_vtu_entry *entry); + + /* GPIO operations */ + const struct mv88e6xxx_gpio_ops *gpio_ops; + + /* Interface to the AVB/PTP registers */ + const struct mv88e6xxx_avb_ops *avb_ops; }; struct mv88e6xxx_irq_ops { @@ -355,6 +417,42 @@ struct mv88e6xxx_irq_ops { void (*irq_free)(struct mv88e6xxx_chip *chip); }; +struct mv88e6xxx_gpio_ops { + /* Get/set data on GPIO pin */ + int (*get_data)(struct mv88e6xxx_chip *chip, unsigned int pin); + int (*set_data)(struct mv88e6xxx_chip *chip, unsigned int pin, + int value); + + /* get/set GPIO direction */ + int (*get_dir)(struct mv88e6xxx_chip *chip, unsigned int pin); + int (*set_dir)(struct mv88e6xxx_chip *chip, unsigned int pin, + bool input); + + /* get/set GPIO pin control */ + int (*get_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin, + int *func); + int (*set_pctl)(struct mv88e6xxx_chip *chip, unsigned int pin, + int func); +}; + +struct mv88e6xxx_avb_ops { + /* Access port-scoped Precision Time Protocol registers */ + int (*port_ptp_read)(struct mv88e6xxx_chip *chip, int port, int addr, + u16 *data, int len); + int (*port_ptp_write)(struct mv88e6xxx_chip *chip, int port, int addr, + u16 data); + + /* Access global Precision Time Protocol registers */ + int (*ptp_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data, + int len); + int (*ptp_write)(struct mv88e6xxx_chip *chip, int addr, u16 data); + + /* Access global Time Application Interface registers */ + int (*tai_read)(struct mv88e6xxx_chip *chip, int addr, u16 *data, + int len); + int (*tai_write)(struct mv88e6xxx_chip *chip, int addr, u16 data); +}; + #define STATS_TYPE_PORT BIT(0) #define STATS_TYPE_BANK0 BIT(1) #define STATS_TYPE_BANK1 BIT(2) @@ -386,6 +484,11 @@ static inline u16 mv88e6xxx_port_mask(struct mv88e6xxx_chip *chip) return GENMASK(mv88e6xxx_num_ports(chip) - 1, 0); } +static inline unsigned int mv88e6xxx_num_gpio(struct mv88e6xxx_chip *chip) +{ + return chip->info->num_gpio; +} + int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index af0727877825..5f370f1fc7c4 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -20,22 +20,22 @@ #include "global1.h" /* for MV88E6XXX_G1_STS_IRQ_DEVICE */ #include "global2.h" -static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) +int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) { return mv88e6xxx_read(chip, chip->info->global2_addr, reg, val); } -static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) { return mv88e6xxx_write(chip, chip->info->global2_addr, reg, val); } -static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update) +int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update) { return mv88e6xxx_update(chip, chip->info->global2_addr, reg, update); } -static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) +int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) { return mv88e6xxx_wait(chip, chip->info->global2_addr, reg, mask); } @@ -798,6 +798,7 @@ int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, struct mii_bus *bus, val); } +/* Offset 0x1B: Watchdog Control */ static int mv88e6097_watchdog_action(struct mv88e6xxx_chip *chip, int irq) { u16 reg; diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h index 669f59017b12..25f92b3d7157 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.h +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -149,7 +149,26 @@ #define MV88E6390_G2_EEPROM_ADDR_MASK 0xffff /* Offset 0x16: AVB Command Register */ -#define MV88E6352_G2_AVB_CMD 0x16 +#define MV88E6352_G2_AVB_CMD 0x16 +#define MV88E6352_G2_AVB_CMD_BUSY 0x8000 +#define MV88E6352_G2_AVB_CMD_OP_READ 0x4000 +#define MV88E6352_G2_AVB_CMD_OP_READ_INCR 0x6000 +#define MV88E6352_G2_AVB_CMD_OP_WRITE 0x3000 +#define MV88E6390_G2_AVB_CMD_OP_READ 0x0000 +#define MV88E6390_G2_AVB_CMD_OP_READ_INCR 0x4000 +#define MV88E6390_G2_AVB_CMD_OP_WRITE 0x6000 +#define MV88E6352_G2_AVB_CMD_PORT_MASK 0x0f00 +#define MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL 0xe +#define MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL 0xf +#define MV88E6390_G2_AVB_CMD_PORT_MASK 0x1f00 +#define MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL 0x1e +#define MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL 0x1f +#define MV88E6352_G2_AVB_CMD_BLOCK_PTP 0 +#define MV88E6352_G2_AVB_CMD_BLOCK_AVB 1 +#define MV88E6352_G2_AVB_CMD_BLOCK_QAV 2 +#define MV88E6352_G2_AVB_CMD_BLOCK_QVB 3 +#define MV88E6352_G2_AVB_CMD_BLOCK_MASK 0x00e0 +#define MV88E6352_G2_AVB_CMD_ADDR_MASK 0x001f /* Offset 0x17: AVB Data Register */ #define MV88E6352_G2_AVB_DATA 0x17 @@ -223,6 +242,35 @@ #define MV88E6352_G2_NOEGR_POLICY 0x2000 #define MV88E6390_G2_LAG_ID_4 0x2000 +/* Scratch/Misc registers accessed through MV88E6XXX_G2_SCRATCH_MISC */ +/* Offset 0x02: Misc Configuration */ +#define MV88E6352_G2_SCRATCH_MISC_CFG 0x02 +#define MV88E6352_G2_SCRATCH_MISC_CFG_NORMALSMI 0x80 +/* Offset 0x60-0x61: GPIO Configuration */ +#define MV88E6352_G2_SCRATCH_GPIO_CFG0 0x60 +#define MV88E6352_G2_SCRATCH_GPIO_CFG1 0x61 +/* Offset 0x62-0x63: GPIO Direction */ +#define MV88E6352_G2_SCRATCH_GPIO_DIR0 0x62 +#define MV88E6352_G2_SCRATCH_GPIO_DIR1 0x63 +#define MV88E6352_G2_SCRATCH_GPIO_DIR_OUT 0 +#define MV88E6352_G2_SCRATCH_GPIO_DIR_IN 1 +/* Offset 0x64-0x65: GPIO Data */ +#define MV88E6352_G2_SCRATCH_GPIO_DATA0 0x64 +#define MV88E6352_G2_SCRATCH_GPIO_DATA1 0x65 +/* Offset 0x68-0x6F: GPIO Pin Control */ +#define MV88E6352_G2_SCRATCH_GPIO_PCTL0 0x68 +#define MV88E6352_G2_SCRATCH_GPIO_PCTL1 0x69 +#define MV88E6352_G2_SCRATCH_GPIO_PCTL2 0x6A +#define MV88E6352_G2_SCRATCH_GPIO_PCTL3 0x6B +#define MV88E6352_G2_SCRATCH_GPIO_PCTL4 0x6C +#define MV88E6352_G2_SCRATCH_GPIO_PCTL5 0x6D +#define MV88E6352_G2_SCRATCH_GPIO_PCTL6 0x6E +#define MV88E6352_G2_SCRATCH_GPIO_PCTL7 0x6F + +#define MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO 0 +#define MV88E6352_G2_SCRATCH_GPIO_PCTL_TRIG 1 +#define MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ 2 + #ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) @@ -230,6 +278,11 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) return 0; } +int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val); +int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val); +int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update); +int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask); + int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port); int mv88e6390_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port); @@ -267,6 +320,11 @@ int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip); extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops; extern const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops; +extern const struct mv88e6xxx_avb_ops mv88e6352_avb_ops; +extern const struct mv88e6xxx_avb_ops mv88e6390_avb_ops; + +extern const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops; + #else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) @@ -279,6 +337,26 @@ static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) return 0; } +static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) +{ + return -EOPNOTSUPP; +} + +static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +{ + return -EOPNOTSUPP; +} + +static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update) +{ + return -EOPNOTSUPP; +} + +static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) +{ + return -EOPNOTSUPP; +} + static inline int mv88e6352_g2_irl_init_all(struct mv88e6xxx_chip *chip, int port) { @@ -382,6 +460,11 @@ static inline int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip) static const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops = {}; static const struct mv88e6xxx_irq_ops mv88e6390_watchdog_ops = {}; +static const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = {}; +static const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = {}; + +static const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = {}; + #endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ #endif /* _MV88E6XXX_GLOBAL2_H */ diff --git a/drivers/net/dsa/mv88e6xxx/global2_avb.c b/drivers/net/dsa/mv88e6xxx/global2_avb.c new file mode 100644 index 000000000000..2e398ccb88ca --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global2_avb.c @@ -0,0 +1,193 @@ +/* + * Marvell 88E6xxx Switch Global 2 Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016-2017 Savoir-faire Linux Inc. + * Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * + * Copyright (c) 2017 National Instruments + * Brandon Streiff <brandon.streiff@ni.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "global2.h" + +/* Offset 0x16: AVB Command Register + * Offset 0x17: AVB Data Register + * + * There are two different versions of this register interface: + * "6352": 3-bit "op" field, 4-bit "port" field. + * "6390": 2-bit "op" field, 5-bit "port" field. + * + * The "op" codes are different between the two, as well as the special + * port fields for global PTP and TAI configuration. + */ + +/* mv88e6xxx_g2_avb_read -- Read one or multiple 16-bit words. + * The hardware supports snapshotting up to four contiguous registers. + */ +static int mv88e6xxx_g2_avb_read(struct mv88e6xxx_chip *chip, u16 readop, + u16 *data, int len) +{ + int err; + int i; + + /* Hardware can only snapshot four words. */ + if (len > 4) + return -E2BIG; + + err = mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, readop); + if (err) + return err; + + for (i = 0; i < len; ++i) { + err = mv88e6xxx_g2_read(chip, MV88E6352_G2_AVB_DATA, + &data[i]); + if (err) + return err; + } + + return 0; +} + +/* mv88e6xxx_g2_avb_write -- Write one 16-bit word. */ +static int mv88e6xxx_g2_avb_write(struct mv88e6xxx_chip *chip, u16 writeop, + u16 data) +{ + int err; + + err = mv88e6xxx_g2_write(chip, MV88E6352_G2_AVB_DATA, data); + if (err) + return err; + + return mv88e6xxx_g2_update(chip, MV88E6352_G2_AVB_CMD, writeop); +} + +static int mv88e6352_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip, + int port, int addr, u16 *data, + int len) +{ + u16 readop = (len == 1 ? MV88E6352_G2_AVB_CMD_OP_READ : + MV88E6352_G2_AVB_CMD_OP_READ_INCR) | + (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | + addr; + + return mv88e6xxx_g2_avb_read(chip, readop, data, len); +} + +static int mv88e6352_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip, + int port, int addr, u16 data) +{ + u16 writeop = MV88E6352_G2_AVB_CMD_OP_WRITE | (port << 8) | + (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr; + + return mv88e6xxx_g2_avb_write(chip, writeop, data); +} + +static int mv88e6352_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr, + u16 *data, int len) +{ + return mv88e6352_g2_avb_port_ptp_read(chip, + MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL, + addr, data, len); +} + +static int mv88e6352_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr, + u16 data) +{ + return mv88e6352_g2_avb_port_ptp_write(chip, + MV88E6352_G2_AVB_CMD_PORT_PTPGLOBAL, + addr, data); +} + +static int mv88e6352_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr, + u16 *data, int len) +{ + return mv88e6352_g2_avb_port_ptp_read(chip, + MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL, + addr, data, len); +} + +static int mv88e6352_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr, + u16 data) +{ + return mv88e6352_g2_avb_port_ptp_write(chip, + MV88E6352_G2_AVB_CMD_PORT_TAIGLOBAL, + addr, data); +} + +const struct mv88e6xxx_avb_ops mv88e6352_avb_ops = { + .port_ptp_read = mv88e6352_g2_avb_port_ptp_read, + .port_ptp_write = mv88e6352_g2_avb_port_ptp_write, + .ptp_read = mv88e6352_g2_avb_ptp_read, + .ptp_write = mv88e6352_g2_avb_ptp_write, + .tai_read = mv88e6352_g2_avb_tai_read, + .tai_write = mv88e6352_g2_avb_tai_write, +}; + +static int mv88e6390_g2_avb_port_ptp_read(struct mv88e6xxx_chip *chip, + int port, int addr, u16 *data, + int len) +{ + u16 readop = (len == 1 ? MV88E6390_G2_AVB_CMD_OP_READ : + MV88E6390_G2_AVB_CMD_OP_READ_INCR) | + (port << 8) | (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | + addr; + + return mv88e6xxx_g2_avb_read(chip, readop, data, len); +} + +static int mv88e6390_g2_avb_port_ptp_write(struct mv88e6xxx_chip *chip, + int port, int addr, u16 data) +{ + u16 writeop = MV88E6390_G2_AVB_CMD_OP_WRITE | (port << 8) | + (MV88E6352_G2_AVB_CMD_BLOCK_PTP << 5) | addr; + + return mv88e6xxx_g2_avb_write(chip, writeop, data); +} + +static int mv88e6390_g2_avb_ptp_read(struct mv88e6xxx_chip *chip, int addr, + u16 *data, int len) +{ + return mv88e6390_g2_avb_port_ptp_read(chip, + MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL, + addr, data, len); +} + +static int mv88e6390_g2_avb_ptp_write(struct mv88e6xxx_chip *chip, int addr, + u16 data) +{ + return mv88e6390_g2_avb_port_ptp_write(chip, + MV88E6390_G2_AVB_CMD_PORT_PTPGLOBAL, + addr, data); +} + +static int mv88e6390_g2_avb_tai_read(struct mv88e6xxx_chip *chip, int addr, + u16 *data, int len) +{ + return mv88e6390_g2_avb_port_ptp_read(chip, + MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL, + addr, data, len); +} + +static int mv88e6390_g2_avb_tai_write(struct mv88e6xxx_chip *chip, int addr, + u16 data) +{ + return mv88e6390_g2_avb_port_ptp_write(chip, + MV88E6390_G2_AVB_CMD_PORT_TAIGLOBAL, + addr, data); +} + +const struct mv88e6xxx_avb_ops mv88e6390_avb_ops = { + .port_ptp_read = mv88e6390_g2_avb_port_ptp_read, + .port_ptp_write = mv88e6390_g2_avb_port_ptp_write, + .ptp_read = mv88e6390_g2_avb_ptp_read, + .ptp_write = mv88e6390_g2_avb_ptp_write, + .tai_read = mv88e6390_g2_avb_tai_read, + .tai_write = mv88e6390_g2_avb_tai_write, +}; diff --git a/drivers/net/dsa/mv88e6xxx/global2_scratch.c b/drivers/net/dsa/mv88e6xxx/global2_scratch.c new file mode 100644 index 000000000000..0ff12bff9f0e --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global2_scratch.c @@ -0,0 +1,240 @@ +/* + * Marvell 88E6xxx Switch Global 2 Scratch & Misc Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2017 National Instruments + * Brandon Streiff <brandon.streiff@ni.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "chip.h" +#include "global2.h" + +/* Offset 0x1A: Scratch and Misc. Register */ +static int mv88e6xxx_g2_scratch_read(struct mv88e6xxx_chip *chip, int reg, + u8 *data) +{ + u16 value; + int err; + + err = mv88e6xxx_g2_write(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, + reg << 8); + if (err) + return err; + + err = mv88e6xxx_g2_read(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, &value); + if (err) + return err; + + *data = (value & MV88E6XXX_G2_SCRATCH_MISC_DATA_MASK); + + return 0; +} + +static int mv88e6xxx_g2_scratch_write(struct mv88e6xxx_chip *chip, int reg, + u8 data) +{ + u16 value = (reg << 8) | data; + + return mv88e6xxx_g2_update(chip, MV88E6XXX_G2_SCRATCH_MISC_MISC, value); +} + +/** + * mv88e6xxx_g2_scratch_gpio_get_bit - get a bit + * @chip: chip private data + * @nr: bit index + * @set: is bit set? + */ +static int mv88e6xxx_g2_scratch_get_bit(struct mv88e6xxx_chip *chip, + int base_reg, unsigned int offset, + int *set) +{ + int reg = base_reg + (offset / 8); + u8 mask = (1 << (offset & 0x7)); + u8 val; + int err; + + err = mv88e6xxx_g2_scratch_read(chip, reg, &val); + if (err) + return err; + + *set = !!(mask & val); + + return 0; +} + +/** + * mv88e6xxx_g2_scratch_gpio_set_bit - set (or clear) a bit + * @chip: chip private data + * @nr: bit index + * @set: set if true, clear if false + * + * Helper function for dealing with the direction and data registers. + */ +static int mv88e6xxx_g2_scratch_set_bit(struct mv88e6xxx_chip *chip, + int base_reg, unsigned int offset, + int set) +{ + int reg = base_reg + (offset / 8); + u8 mask = (1 << (offset & 0x7)); + u8 val; + int err; + + err = mv88e6xxx_g2_scratch_read(chip, reg, &val); + if (err) + return err; + + if (set) + val |= mask; + else + val &= ~mask; + + return mv88e6xxx_g2_scratch_write(chip, reg, val); +} + +/** + * mv88e6352_g2_scratch_gpio_get_data - get data on gpio pin + * @chip: chip private data + * @pin: gpio index + * + * Return: 0 for low, 1 for high, negative error + */ +static int mv88e6352_g2_scratch_gpio_get_data(struct mv88e6xxx_chip *chip, + unsigned int pin) +{ + int val = 0; + int err; + + err = mv88e6xxx_g2_scratch_get_bit(chip, + MV88E6352_G2_SCRATCH_GPIO_DATA0, + pin, &val); + if (err) + return err; + + return val; +} + +/** + * mv88e6352_g2_scratch_gpio_set_data - set data on gpio pin + * @chip: chip private data + * @pin: gpio index + * @value: value to set + */ +static int mv88e6352_g2_scratch_gpio_set_data(struct mv88e6xxx_chip *chip, + unsigned int pin, int value) +{ + u8 mask = (1 << (pin & 0x7)); + int offset = (pin / 8); + int reg; + + reg = MV88E6352_G2_SCRATCH_GPIO_DATA0 + offset; + + if (value) + chip->gpio_data[offset] |= mask; + else + chip->gpio_data[offset] &= ~mask; + + return mv88e6xxx_g2_scratch_write(chip, reg, chip->gpio_data[offset]); +} + +/** + * mv88e6352_g2_scratch_gpio_get_dir - get direction of gpio pin + * @chip: chip private data + * @pin: gpio index + * + * Return: 0 for output, 1 for input (same as GPIOF_DIR_XXX). + */ +static int mv88e6352_g2_scratch_gpio_get_dir(struct mv88e6xxx_chip *chip, + unsigned int pin) +{ + int val = 0; + int err; + + err = mv88e6xxx_g2_scratch_get_bit(chip, + MV88E6352_G2_SCRATCH_GPIO_DIR0, + pin, &val); + if (err) + return err; + + return val; +} + +/** + * mv88e6352_g2_scratch_gpio_set_dir - set direction of gpio pin + * @chip: chip private data + * @pin: gpio index + */ +static int mv88e6352_g2_scratch_gpio_set_dir(struct mv88e6xxx_chip *chip, + unsigned int pin, bool input) +{ + int value = (input ? MV88E6352_G2_SCRATCH_GPIO_DIR_IN : + MV88E6352_G2_SCRATCH_GPIO_DIR_OUT); + + return mv88e6xxx_g2_scratch_set_bit(chip, + MV88E6352_G2_SCRATCH_GPIO_DIR0, + pin, value); +} + +/** + * mv88e6352_g2_scratch_gpio_get_pctl - get pin control setting + * @chip: chip private data + * @pin: gpio index + * @func: function number + * + * Note that the function numbers themselves may vary by chipset. + */ +static int mv88e6352_g2_scratch_gpio_get_pctl(struct mv88e6xxx_chip *chip, + unsigned int pin, int *func) +{ + int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2); + int offset = (pin & 0x1) ? 4 : 0; + u8 mask = (0x7 << offset); + int err; + u8 val; + + err = mv88e6xxx_g2_scratch_read(chip, reg, &val); + if (err) + return err; + + *func = (val & mask) >> offset; + + return 0; +} + +/** + * mv88e6352_g2_scratch_gpio_set_pctl - set pin control setting + * @chip: chip private data + * @pin: gpio index + * @func: function number + */ +static int mv88e6352_g2_scratch_gpio_set_pctl(struct mv88e6xxx_chip *chip, + unsigned int pin, int func) +{ + int reg = MV88E6352_G2_SCRATCH_GPIO_PCTL0 + (pin / 2); + int offset = (pin & 0x1) ? 4 : 0; + u8 mask = (0x7 << offset); + int err; + u8 val; + + err = mv88e6xxx_g2_scratch_read(chip, reg, &val); + if (err) + return err; + + val = (val & ~mask) | ((func & mask) << offset); + + return mv88e6xxx_g2_scratch_write(chip, reg, val); +} + +const struct mv88e6xxx_gpio_ops mv88e6352_gpio_ops = { + .get_data = mv88e6352_g2_scratch_gpio_get_data, + .set_data = mv88e6352_g2_scratch_gpio_set_data, + .get_dir = mv88e6352_g2_scratch_gpio_get_dir, + .set_dir = mv88e6352_g2_scratch_gpio_set_dir, + .get_pctl = mv88e6352_g2_scratch_gpio_get_pctl, + .set_pctl = mv88e6352_g2_scratch_gpio_set_pctl, +}; diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.c b/drivers/net/dsa/mv88e6xxx/hwtstamp.c new file mode 100644 index 000000000000..ac7694c71266 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.c @@ -0,0 +1,576 @@ +/* + * Marvell 88E6xxx Switch hardware timestamping support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2017 National Instruments + * Erik Hons <erik.hons@ni.com> + * Brandon Streiff <brandon.streiff@ni.com> + * Dane Wagner <dane.wagner@ni.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "chip.h" +#include "global2.h" +#include "hwtstamp.h" +#include "ptp.h" +#include <linux/ptp_classify.h> + +#define SKB_PTP_TYPE(__skb) (*(unsigned int *)((__skb)->cb)) + +static int mv88e6xxx_port_ptp_read(struct mv88e6xxx_chip *chip, int port, + int addr, u16 *data, int len) +{ + if (!chip->info->ops->avb_ops->port_ptp_read) + return -EOPNOTSUPP; + + return chip->info->ops->avb_ops->port_ptp_read(chip, port, addr, + data, len); +} + +static int mv88e6xxx_port_ptp_write(struct mv88e6xxx_chip *chip, int port, + int addr, u16 data) +{ + if (!chip->info->ops->avb_ops->port_ptp_write) + return -EOPNOTSUPP; + + return chip->info->ops->avb_ops->port_ptp_write(chip, port, addr, + data); +} + +static int mv88e6xxx_ptp_write(struct mv88e6xxx_chip *chip, int addr, + u16 data) +{ + if (!chip->info->ops->avb_ops->ptp_write) + return -EOPNOTSUPP; + + return chip->info->ops->avb_ops->ptp_write(chip, addr, data); +} + +/* TX_TSTAMP_TIMEOUT: This limits the time spent polling for a TX + * timestamp. When working properly, hardware will produce a timestamp + * within 1ms. Software may enounter delays due to MDIO contention, so + * the timeout is set accordingly. + */ +#define TX_TSTAMP_TIMEOUT msecs_to_jiffies(20) + +int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, + struct ethtool_ts_info *info) +{ + struct mv88e6xxx_chip *chip = ds->priv; + + if (!chip->info->ptp_support) + return -EOPNOTSUPP; + + info->so_timestamping = + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + info->phc_index = ptp_clock_index(chip->ptp_clock); + info->tx_types = + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + info->rx_filters = + (1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_EVENT) | + (1 << HWTSTAMP_FILTER_PTP_V2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_DELAY_REQ); + + return 0; +} + +static int mv88e6xxx_set_hwtstamp_config(struct mv88e6xxx_chip *chip, int port, + struct hwtstamp_config *config) +{ + struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; + bool tstamp_enable = false; + u16 port_config0; + int err; + + /* Prevent the TX/RX paths from trying to interact with the + * timestamp hardware while we reconfigure it. + */ + clear_bit_unlock(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state); + + /* reserved for future extensions */ + if (config->flags) + return -EINVAL; + + switch (config->tx_type) { + case HWTSTAMP_TX_OFF: + tstamp_enable = false; + break; + case HWTSTAMP_TX_ON: + tstamp_enable = true; + break; + default: + return -ERANGE; + } + + /* The switch supports timestamping both L2 and L4; one cannot be + * disabled independently of the other. + */ + switch (config->rx_filter) { + case HWTSTAMP_FILTER_NONE: + tstamp_enable = false; + break; + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + break; + case HWTSTAMP_FILTER_ALL: + default: + config->rx_filter = HWTSTAMP_FILTER_NONE; + return -ERANGE; + } + + if (tstamp_enable) { + /* Disable transportSpecific value matching, so that packets + * with either 1588 (0) and 802.1AS (1) will be timestamped. + */ + port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH; + } else { + /* Disable PTP. This disables both RX and TX timestamping. */ + port_config0 = MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP; + } + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0, + port_config0); + mutex_unlock(&chip->reg_lock); + + if (err < 0) + return err; + + /* Once hardware has been configured, enable timestamp checks + * in the RX/TX paths. + */ + if (tstamp_enable) + set_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state); + + return 0; +} + +int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port, + struct ifreq *ifr) +{ + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; + struct hwtstamp_config config; + int err; + + if (!chip->info->ptp_support) + return -EOPNOTSUPP; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + err = mv88e6xxx_set_hwtstamp_config(chip, port, &config); + if (err) + return err; + + /* Save the chosen configuration to be returned later. */ + memcpy(&ps->tstamp_config, &config, sizeof(config)); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port, + struct ifreq *ifr) +{ + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; + struct hwtstamp_config *config = &ps->tstamp_config; + + if (!chip->info->ptp_support) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ? + -EFAULT : 0; +} + +/* Get the start of the PTP header in this skb */ +static u8 *parse_ptp_header(struct sk_buff *skb, unsigned int type) +{ + u8 *data = skb_mac_header(skb); + unsigned int offset = 0; + + if (type & PTP_CLASS_VLAN) + offset += VLAN_HLEN; + + switch (type & PTP_CLASS_PMASK) { + case PTP_CLASS_IPV4: + offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN; + break; + case PTP_CLASS_IPV6: + offset += ETH_HLEN + IP6_HLEN + UDP_HLEN; + break; + case PTP_CLASS_L2: + offset += ETH_HLEN; + break; + default: + return NULL; + } + + /* Ensure that the entire header is present in this packet. */ + if (skb->len + ETH_HLEN < offset + 34) + return NULL; + + return data + offset; +} + +/* Returns a pointer to the PTP header if the caller should time stamp, + * or NULL if the caller should not. + */ +static u8 *mv88e6xxx_should_tstamp(struct mv88e6xxx_chip *chip, int port, + struct sk_buff *skb, unsigned int type) +{ + struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; + u8 *hdr; + + if (!chip->info->ptp_support) + return NULL; + + hdr = parse_ptp_header(skb, type); + if (!hdr) + return NULL; + + if (!test_bit(MV88E6XXX_HWTSTAMP_ENABLED, &ps->state)) + return NULL; + + return hdr; +} + +static int mv88e6xxx_ts_valid(u16 status) +{ + if (!(status & MV88E6XXX_PTP_TS_VALID)) + return 0; + if (status & MV88E6XXX_PTP_TS_STATUS_MASK) + return 0; + return 1; +} + +static int seq_match(struct sk_buff *skb, u16 ts_seqid) +{ + unsigned int type = SKB_PTP_TYPE(skb); + u8 *hdr = parse_ptp_header(skb, type); + __be16 *seqid; + + seqid = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID); + + return ts_seqid == ntohs(*seqid); +} + +static void mv88e6xxx_get_rxts(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_port_hwtstamp *ps, + struct sk_buff *skb, u16 reg, + struct sk_buff_head *rxq) +{ + u16 buf[4] = { 0 }, status, seq_id; + u64 ns, timelo, timehi; + struct skb_shared_hwtstamps *shwt; + int err; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_port_ptp_read(chip, ps->port_id, + reg, buf, ARRAY_SIZE(buf)); + mutex_unlock(&chip->reg_lock); + if (err) + pr_err("failed to get the receive time stamp\n"); + + status = buf[0]; + timelo = buf[1]; + timehi = buf[2]; + seq_id = buf[3]; + + if (status & MV88E6XXX_PTP_TS_VALID) { + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_port_ptp_write(chip, ps->port_id, reg, 0); + mutex_unlock(&chip->reg_lock); + if (err) + pr_err("failed to clear the receive status\n"); + } + /* Since the device can only handle one time stamp at a time, + * we purge any extra frames from the queue. + */ + for ( ; skb; skb = skb_dequeue(rxq)) { + if (mv88e6xxx_ts_valid(status) && seq_match(skb, seq_id)) { + ns = timehi << 16 | timelo; + + mutex_lock(&chip->reg_lock); + ns = timecounter_cyc2time(&chip->tstamp_tc, ns); + mutex_unlock(&chip->reg_lock); + shwt = skb_hwtstamps(skb); + memset(shwt, 0, sizeof(*shwt)); + shwt->hwtstamp = ns_to_ktime(ns); + status &= ~MV88E6XXX_PTP_TS_VALID; + } + netif_rx_ni(skb); + } +} + +static void mv88e6xxx_rxtstamp_work(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_port_hwtstamp *ps) +{ + struct sk_buff *skb; + + skb = skb_dequeue(&ps->rx_queue); + + if (skb) + mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR0_STS, + &ps->rx_queue); + + skb = skb_dequeue(&ps->rx_queue2); + if (skb) + mv88e6xxx_get_rxts(chip, ps, skb, MV88E6XXX_PORT_PTP_ARR1_STS, + &ps->rx_queue2); +} + +static int is_pdelay_resp(u8 *msgtype) +{ + return (*msgtype & 0xf) == 3; +} + +bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port, + struct sk_buff *skb, unsigned int type) +{ + struct mv88e6xxx_port_hwtstamp *ps; + struct mv88e6xxx_chip *chip; + u8 *hdr; + + chip = ds->priv; + ps = &chip->port_hwtstamp[port]; + + if (ps->tstamp_config.rx_filter != HWTSTAMP_FILTER_PTP_V2_EVENT) + return false; + + hdr = mv88e6xxx_should_tstamp(chip, port, skb, type); + if (!hdr) + return false; + + SKB_PTP_TYPE(skb) = type; + + if (is_pdelay_resp(hdr)) + skb_queue_tail(&ps->rx_queue2, skb); + else + skb_queue_tail(&ps->rx_queue, skb); + + ptp_schedule_worker(chip->ptp_clock, 0); + + return true; +} + +static int mv88e6xxx_txtstamp_work(struct mv88e6xxx_chip *chip, + struct mv88e6xxx_port_hwtstamp *ps) +{ + struct skb_shared_hwtstamps shhwtstamps; + u16 departure_block[4], status; + struct sk_buff *tmp_skb; + u32 time_raw; + int err; + u64 ns; + + if (!ps->tx_skb) + return 0; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_port_ptp_read(chip, ps->port_id, + MV88E6XXX_PORT_PTP_DEP_STS, + departure_block, + ARRAY_SIZE(departure_block)); + mutex_unlock(&chip->reg_lock); + + if (err) + goto free_and_clear_skb; + + if (!(departure_block[0] & MV88E6XXX_PTP_TS_VALID)) { + if (time_is_before_jiffies(ps->tx_tstamp_start + + TX_TSTAMP_TIMEOUT)) { + dev_warn(chip->dev, "p%d: clearing tx timestamp hang\n", + ps->port_id); + goto free_and_clear_skb; + } + /* The timestamp should be available quickly, while getting it + * is high priority and time bounded to only 10ms. A poll is + * warranted so restart the work. + */ + return 1; + } + + /* We have the timestamp; go ahead and clear valid now */ + mutex_lock(&chip->reg_lock); + mv88e6xxx_port_ptp_write(chip, ps->port_id, + MV88E6XXX_PORT_PTP_DEP_STS, 0); + mutex_unlock(&chip->reg_lock); + + status = departure_block[0] & MV88E6XXX_PTP_TS_STATUS_MASK; + if (status != MV88E6XXX_PTP_TS_STATUS_NORMAL) { + dev_warn(chip->dev, "p%d: tx timestamp overrun\n", ps->port_id); + goto free_and_clear_skb; + } + + if (departure_block[3] != ps->tx_seq_id) { + dev_warn(chip->dev, "p%d: unexpected seq. id\n", ps->port_id); + goto free_and_clear_skb; + } + + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + time_raw = ((u32)departure_block[2] << 16) | departure_block[1]; + mutex_lock(&chip->reg_lock); + ns = timecounter_cyc2time(&chip->tstamp_tc, time_raw); + mutex_unlock(&chip->reg_lock); + shhwtstamps.hwtstamp = ns_to_ktime(ns); + + dev_dbg(chip->dev, + "p%d: txtstamp %llx status 0x%04x skb ID 0x%04x hw ID 0x%04x\n", + ps->port_id, ktime_to_ns(shhwtstamps.hwtstamp), + departure_block[0], ps->tx_seq_id, departure_block[3]); + + /* skb_complete_tx_timestamp() will free up the client to make + * another timestamp-able transmit. We have to be ready for it + * -- by clearing the ps->tx_skb "flag" -- beforehand. + */ + + tmp_skb = ps->tx_skb; + ps->tx_skb = NULL; + clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state); + skb_complete_tx_timestamp(tmp_skb, &shhwtstamps); + + return 0; + +free_and_clear_skb: + dev_kfree_skb_any(ps->tx_skb); + ps->tx_skb = NULL; + clear_bit_unlock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state); + + return 0; +} + +long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp) +{ + struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); + struct dsa_switch *ds = chip->ds; + struct mv88e6xxx_port_hwtstamp *ps; + int i, restart = 0; + + for (i = 0; i < ds->num_ports; i++) { + if (!dsa_is_user_port(ds, i)) + continue; + + ps = &chip->port_hwtstamp[i]; + if (test_bit(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, &ps->state)) + restart |= mv88e6xxx_txtstamp_work(chip, ps); + + mv88e6xxx_rxtstamp_work(chip, ps); + } + + return restart ? 1 : -1; +} + +bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, unsigned int type) +{ + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; + __be16 *seq_ptr; + u8 *hdr; + + if (!(skb_shinfo(clone)->tx_flags & SKBTX_HW_TSTAMP)) + return false; + + hdr = mv88e6xxx_should_tstamp(chip, port, clone, type); + if (!hdr) + return false; + + seq_ptr = (__be16 *)(hdr + OFF_PTP_SEQUENCE_ID); + + if (test_and_set_bit_lock(MV88E6XXX_HWTSTAMP_TX_IN_PROGRESS, + &ps->state)) + return false; + + ps->tx_skb = clone; + ps->tx_tstamp_start = jiffies; + ps->tx_seq_id = be16_to_cpup(seq_ptr); + + ptp_schedule_worker(chip->ptp_clock, 0); + return true; +} + +static int mv88e6xxx_hwtstamp_port_setup(struct mv88e6xxx_chip *chip, int port) +{ + struct mv88e6xxx_port_hwtstamp *ps = &chip->port_hwtstamp[port]; + + ps->port_id = port; + + skb_queue_head_init(&ps->rx_queue); + skb_queue_head_init(&ps->rx_queue2); + + return mv88e6xxx_port_ptp_write(chip, port, MV88E6XXX_PORT_PTP_CFG0, + MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP); +} + +int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip) +{ + int err; + int i; + + /* Disable timestamping on all ports. */ + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { + err = mv88e6xxx_hwtstamp_port_setup(chip, i); + if (err) + return err; + } + + /* MV88E6XXX_PTP_MSG_TYPE is a mask of PTP message types to + * timestamp. This affects all ports that have timestamping enabled, + * but the timestamp config is per-port; thus we configure all events + * here and only support the HWTSTAMP_FILTER_*_EVENT filter types. + */ + err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_MSGTYPE, + MV88E6XXX_PTP_MSGTYPE_ALL_EVENT); + if (err) + return err; + + /* Use ARRIVAL1 for peer delay response messages. */ + err = mv88e6xxx_ptp_write(chip, MV88E6XXX_PTP_TS_ARRIVAL_PTR, + MV88E6XXX_PTP_MSGTYPE_PDLAY_RES); + if (err) + return err; + + /* 88E6341 devices default to timestamping at the PHY, but this has + * a hardware issue that results in unreliable timestamps. Force + * these devices to timestamp at the MAC. + */ + if (chip->info->family == MV88E6XXX_FAMILY_6341) { + u16 val = MV88E6341_PTP_CFG_UPDATE | + MV88E6341_PTP_CFG_MODE_IDX | + MV88E6341_PTP_CFG_MODE_TS_AT_MAC; + err = mv88e6xxx_ptp_write(chip, MV88E6341_PTP_CFG, val); + if (err) + return err; + } + + return 0; +} + +void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip) +{ +} diff --git a/drivers/net/dsa/mv88e6xxx/hwtstamp.h b/drivers/net/dsa/mv88e6xxx/hwtstamp.h new file mode 100644 index 000000000000..bc71c9212a08 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/hwtstamp.h @@ -0,0 +1,172 @@ +/* + * Marvell 88E6xxx Switch hardware timestamping support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2017 National Instruments + * Erik Hons <erik.hons@ni.com> + * Brandon Streiff <brandon.streiff@ni.com> + * Dane Wagner <dane.wagner@ni.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _MV88E6XXX_HWTSTAMP_H +#define _MV88E6XXX_HWTSTAMP_H + +#include "chip.h" + +/* Global PTP registers */ +/* Offset 0x00: PTP EtherType */ +#define MV88E6XXX_PTP_ETHERTYPE 0x00 + +/* Offset 0x01: Message Type Timestamp Enables */ +#define MV88E6XXX_PTP_MSGTYPE 0x01 +#define MV88E6XXX_PTP_MSGTYPE_SYNC 0x0001 +#define MV88E6XXX_PTP_MSGTYPE_DELAY_REQ 0x0002 +#define MV88E6XXX_PTP_MSGTYPE_PDLAY_REQ 0x0004 +#define MV88E6XXX_PTP_MSGTYPE_PDLAY_RES 0x0008 +#define MV88E6XXX_PTP_MSGTYPE_ALL_EVENT 0x000f + +/* Offset 0x02: Timestamp Arrival Capture Pointers */ +#define MV88E6XXX_PTP_TS_ARRIVAL_PTR 0x02 + +/* Offset 0x07: PTP Global Configuration */ +#define MV88E6341_PTP_CFG 0x07 +#define MV88E6341_PTP_CFG_UPDATE 0x8000 +#define MV88E6341_PTP_CFG_IDX_MASK 0x7f00 +#define MV88E6341_PTP_CFG_DATA_MASK 0x00ff +#define MV88E6341_PTP_CFG_MODE_IDX 0x0 +#define MV88E6341_PTP_CFG_MODE_TS_AT_PHY 0x00 +#define MV88E6341_PTP_CFG_MODE_TS_AT_MAC 0x80 + +/* Offset 0x08: PTP Interrupt Status */ +#define MV88E6XXX_PTP_IRQ_STATUS 0x08 + +/* Per-Port PTP Registers */ +/* Offset 0x00: PTP Configuration 0 */ +#define MV88E6XXX_PORT_PTP_CFG0 0x00 +#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_SHIFT 12 +#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_MASK 0xf000 +#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_1588 0x0000 +#define MV88E6XXX_PORT_PTP_CFG0_TSPEC_8021AS 0x1000 +#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_TSPEC_MATCH 0x0800 +#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_OVERWRITE 0x0002 +#define MV88E6XXX_PORT_PTP_CFG0_DISABLE_PTP 0x0001 + +/* Offset 0x01: PTP Configuration 1 */ +#define MV88E6XXX_PORT_PTP_CFG1 0x01 + +/* Offset 0x02: PTP Configuration 2 */ +#define MV88E6XXX_PORT_PTP_CFG2 0x02 +#define MV88E6XXX_PORT_PTP_CFG2_EMBED_ARRIVAL 0x1000 +#define MV88E6XXX_PORT_PTP_CFG2_DEP_IRQ_EN 0x0002 +#define MV88E6XXX_PORT_PTP_CFG2_ARR_IRQ_EN 0x0001 + +/* Offset 0x03: PTP LED Configuration */ +#define MV88E6XXX_PORT_PTP_LED_CFG 0x03 + +/* Offset 0x08: PTP Arrival 0 Status */ +#define MV88E6XXX_PORT_PTP_ARR0_STS 0x08 + +/* Offset 0x09/0x0A: PTP Arrival 0 Time */ +#define MV88E6XXX_PORT_PTP_ARR0_TIME_LO 0x09 +#define MV88E6XXX_PORT_PTP_ARR0_TIME_HI 0x0a + +/* Offset 0x0B: PTP Arrival 0 Sequence ID */ +#define MV88E6XXX_PORT_PTP_ARR0_SEQID 0x0b + +/* Offset 0x0C: PTP Arrival 1 Status */ +#define MV88E6XXX_PORT_PTP_ARR1_STS 0x0c + +/* Offset 0x0D/0x0E: PTP Arrival 1 Time */ +#define MV88E6XXX_PORT_PTP_ARR1_TIME_LO 0x0d +#define MV88E6XXX_PORT_PTP_ARR1_TIME_HI 0x0e + +/* Offset 0x0F: PTP Arrival 1 Sequence ID */ +#define MV88E6XXX_PORT_PTP_ARR1_SEQID 0x0f + +/* Offset 0x10: PTP Departure Status */ +#define MV88E6XXX_PORT_PTP_DEP_STS 0x10 + +/* Offset 0x11/0x12: PTP Deperture Time */ +#define MV88E6XXX_PORT_PTP_DEP_TIME_LO 0x11 +#define MV88E6XXX_PORT_PTP_DEP_TIME_HI 0x12 + +/* Offset 0x13: PTP Departure Sequence ID */ +#define MV88E6XXX_PORT_PTP_DEP_SEQID 0x13 + +/* Status fields for arrival and depature timestamp status registers */ +#define MV88E6XXX_PTP_TS_STATUS_MASK 0x0006 +#define MV88E6XXX_PTP_TS_STATUS_NORMAL 0x0000 +#define MV88E6XXX_PTP_TS_STATUS_OVERWITTEN 0x0002 +#define MV88E6XXX_PTP_TS_STATUS_DISCARDED 0x0004 +#define MV88E6XXX_PTP_TS_VALID 0x0001 + +#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP + +int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, int port, + struct ifreq *ifr); +int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, int port, + struct ifreq *ifr); + +bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, unsigned int type); +bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, unsigned int type); + +int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, + struct ethtool_ts_info *info); + +int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip); +void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip); + +#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */ + +static inline int mv88e6xxx_port_hwtstamp_set(struct dsa_switch *ds, + int port, struct ifreq *ifr) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_port_hwtstamp_get(struct dsa_switch *ds, + int port, struct ifreq *ifr) +{ + return -EOPNOTSUPP; +} + +static inline bool mv88e6xxx_port_rxtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, + unsigned int type) +{ + return false; +} + +static inline bool mv88e6xxx_port_txtstamp(struct dsa_switch *ds, int port, + struct sk_buff *clone, + unsigned int type) +{ + return false; +} + +static inline int mv88e6xxx_get_ts_info(struct dsa_switch *ds, int port, + struct ethtool_ts_info *info) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_hwtstamp_setup(struct mv88e6xxx_chip *chip) +{ + return 0; +} + +static inline void mv88e6xxx_hwtstamp_free(struct mv88e6xxx_chip *chip) +{ +} + +#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */ + +#endif /* _MV88E6XXX_HWTSTAMP_H */ diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c new file mode 100644 index 000000000000..bd85e2c390e1 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/ptp.c @@ -0,0 +1,381 @@ +/* + * Marvell 88E6xxx Switch PTP support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2017 National Instruments + * Erik Hons <erik.hons@ni.com> + * Brandon Streiff <brandon.streiff@ni.com> + * Dane Wagner <dane.wagner@ni.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "chip.h" +#include "global2.h" +#include "ptp.h" + +/* Raw timestamps are in units of 8-ns clock periods. */ +#define CC_SHIFT 28 +#define CC_MULT (8 << CC_SHIFT) +#define CC_MULT_NUM (1 << 9) +#define CC_MULT_DEM 15625ULL + +#define TAI_EVENT_WORK_INTERVAL msecs_to_jiffies(100) + +#define cc_to_chip(cc) container_of(cc, struct mv88e6xxx_chip, tstamp_cc) +#define dw_overflow_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \ + overflow_work) +#define dw_tai_event_to_chip(dw) container_of(dw, struct mv88e6xxx_chip, \ + tai_event_work) + +static int mv88e6xxx_tai_read(struct mv88e6xxx_chip *chip, int addr, + u16 *data, int len) +{ + if (!chip->info->ops->avb_ops->tai_read) + return -EOPNOTSUPP; + + return chip->info->ops->avb_ops->tai_read(chip, addr, data, len); +} + +static int mv88e6xxx_tai_write(struct mv88e6xxx_chip *chip, int addr, u16 data) +{ + if (!chip->info->ops->avb_ops->tai_write) + return -EOPNOTSUPP; + + return chip->info->ops->avb_ops->tai_write(chip, addr, data); +} + +/* TODO: places where this are called should be using pinctrl */ +static int mv88e6xxx_set_gpio_func(struct mv88e6xxx_chip *chip, int pin, + int func, int input) +{ + int err; + + if (!chip->info->ops->gpio_ops) + return -EOPNOTSUPP; + + err = chip->info->ops->gpio_ops->set_dir(chip, pin, input); + if (err) + return err; + + return chip->info->ops->gpio_ops->set_pctl(chip, pin, func); +} + +static u64 mv88e6xxx_ptp_clock_read(const struct cyclecounter *cc) +{ + struct mv88e6xxx_chip *chip = cc_to_chip(cc); + u16 phc_time[2]; + int err; + + err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_TIME_LO, phc_time, + ARRAY_SIZE(phc_time)); + if (err) + return 0; + else + return ((u32)phc_time[1] << 16) | phc_time[0]; +} + +/* mv88e6xxx_config_eventcap - configure TAI event capture + * @event: PTP_CLOCK_PPS (internal) or PTP_CLOCK_EXTTS (external) + * @rising: zero for falling-edge trigger, else rising-edge trigger + * + * This will also reset the capture sequence counter. + */ +static int mv88e6xxx_config_eventcap(struct mv88e6xxx_chip *chip, int event, + int rising) +{ + u16 global_config; + u16 cap_config; + int err; + + chip->evcap_config = MV88E6XXX_TAI_CFG_CAP_OVERWRITE | + MV88E6XXX_TAI_CFG_CAP_CTR_START; + if (!rising) + chip->evcap_config |= MV88E6XXX_TAI_CFG_EVREQ_FALLING; + + global_config = (chip->evcap_config | chip->trig_config); + err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_CFG, global_config); + if (err) + return err; + + if (event == PTP_CLOCK_PPS) { + cap_config = MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG; + } else if (event == PTP_CLOCK_EXTTS) { + /* if STATUS_CAP_TRIG is unset we capture PTP_EVREQ events */ + cap_config = 0; + } else { + return -EINVAL; + } + + /* Write the capture config; this also clears the capture counter */ + err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS, + cap_config); + + return err; +} + +static void mv88e6xxx_tai_event_work(struct work_struct *ugly) +{ + struct delayed_work *dw = to_delayed_work(ugly); + struct mv88e6xxx_chip *chip = dw_tai_event_to_chip(dw); + struct ptp_clock_event ev; + u16 status[4]; + u32 raw_ts; + int err; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_tai_read(chip, MV88E6XXX_TAI_EVENT_STATUS, + status, ARRAY_SIZE(status)); + mutex_unlock(&chip->reg_lock); + + if (err) { + dev_err(chip->dev, "failed to read TAI status register\n"); + return; + } + if (status[0] & MV88E6XXX_TAI_EVENT_STATUS_ERROR) { + dev_warn(chip->dev, "missed event capture\n"); + return; + } + if (!(status[0] & MV88E6XXX_TAI_EVENT_STATUS_VALID)) + goto out; + + raw_ts = ((u32)status[2] << 16) | status[1]; + + /* Clear the valid bit so the next timestamp can come in */ + status[0] &= ~MV88E6XXX_TAI_EVENT_STATUS_VALID; + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_tai_write(chip, MV88E6XXX_TAI_EVENT_STATUS, status[0]); + mutex_unlock(&chip->reg_lock); + + /* This is an external timestamp */ + ev.type = PTP_CLOCK_EXTTS; + + /* We only have one timestamping channel. */ + ev.index = 0; + mutex_lock(&chip->reg_lock); + ev.timestamp = timecounter_cyc2time(&chip->tstamp_tc, raw_ts); + mutex_unlock(&chip->reg_lock); + + ptp_clock_event(chip->ptp_clock, &ev); +out: + schedule_delayed_work(&chip->tai_event_work, TAI_EVENT_WORK_INTERVAL); +} + +static int mv88e6xxx_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); + int neg_adj = 0; + u32 diff, mult; + u64 adj; + + if (scaled_ppm < 0) { + neg_adj = 1; + scaled_ppm = -scaled_ppm; + } + mult = CC_MULT; + adj = CC_MULT_NUM; + adj *= scaled_ppm; + diff = div_u64(adj, CC_MULT_DEM); + + mutex_lock(&chip->reg_lock); + + timecounter_read(&chip->tstamp_tc); + chip->tstamp_cc.mult = neg_adj ? mult - diff : mult + diff; + + mutex_unlock(&chip->reg_lock); + + return 0; +} + +static int mv88e6xxx_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); + + mutex_lock(&chip->reg_lock); + timecounter_adjtime(&chip->tstamp_tc, delta); + mutex_unlock(&chip->reg_lock); + + return 0; +} + +static int mv88e6xxx_ptp_gettime(struct ptp_clock_info *ptp, + struct timespec64 *ts) +{ + struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); + u64 ns; + + mutex_lock(&chip->reg_lock); + ns = timecounter_read(&chip->tstamp_tc); + mutex_unlock(&chip->reg_lock); + + *ts = ns_to_timespec64(ns); + + return 0; +} + +static int mv88e6xxx_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); + u64 ns; + + ns = timespec64_to_ns(ts); + + mutex_lock(&chip->reg_lock); + timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc, ns); + mutex_unlock(&chip->reg_lock); + + return 0; +} + +static int mv88e6xxx_ptp_enable_extts(struct mv88e6xxx_chip *chip, + struct ptp_clock_request *rq, int on) +{ + int rising = (rq->extts.flags & PTP_RISING_EDGE); + int func; + int pin; + int err; + + pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index); + + if (pin < 0) + return -EBUSY; + + mutex_lock(&chip->reg_lock); + + if (on) { + func = MV88E6352_G2_SCRATCH_GPIO_PCTL_EVREQ; + + err = mv88e6xxx_set_gpio_func(chip, pin, func, true); + if (err) + goto out; + + schedule_delayed_work(&chip->tai_event_work, + TAI_EVENT_WORK_INTERVAL); + + err = mv88e6xxx_config_eventcap(chip, PTP_CLOCK_EXTTS, rising); + } else { + func = MV88E6352_G2_SCRATCH_GPIO_PCTL_GPIO; + + err = mv88e6xxx_set_gpio_func(chip, pin, func, true); + + cancel_delayed_work_sync(&chip->tai_event_work); + } + +out: + mutex_unlock(&chip->reg_lock); + + return err; +} + +static int mv88e6xxx_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + struct mv88e6xxx_chip *chip = ptp_to_chip(ptp); + + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + return mv88e6xxx_ptp_enable_extts(chip, rq, on); + default: + return -EOPNOTSUPP; + } +} + +static int mv88e6xxx_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + switch (func) { + case PTP_PF_NONE: + case PTP_PF_EXTTS: + break; + case PTP_PF_PEROUT: + case PTP_PF_PHYSYNC: + return -EOPNOTSUPP; + } + return 0; +} + +/* With a 125MHz input clock, the 32-bit timestamp counter overflows in ~34.3 + * seconds; this task forces periodic reads so that we don't miss any. + */ +#define MV88E6XXX_TAI_OVERFLOW_PERIOD (HZ * 16) +static void mv88e6xxx_ptp_overflow_check(struct work_struct *work) +{ + struct delayed_work *dw = to_delayed_work(work); + struct mv88e6xxx_chip *chip = dw_overflow_to_chip(dw); + struct timespec64 ts; + + mv88e6xxx_ptp_gettime(&chip->ptp_clock_info, &ts); + + schedule_delayed_work(&chip->overflow_work, + MV88E6XXX_TAI_OVERFLOW_PERIOD); +} + +int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip) +{ + int i; + + /* Set up the cycle counter */ + memset(&chip->tstamp_cc, 0, sizeof(chip->tstamp_cc)); + chip->tstamp_cc.read = mv88e6xxx_ptp_clock_read; + chip->tstamp_cc.mask = CYCLECOUNTER_MASK(32); + chip->tstamp_cc.mult = CC_MULT; + chip->tstamp_cc.shift = CC_SHIFT; + + timecounter_init(&chip->tstamp_tc, &chip->tstamp_cc, + ktime_to_ns(ktime_get_real())); + + INIT_DELAYED_WORK(&chip->overflow_work, mv88e6xxx_ptp_overflow_check); + INIT_DELAYED_WORK(&chip->tai_event_work, mv88e6xxx_tai_event_work); + + chip->ptp_clock_info.owner = THIS_MODULE; + snprintf(chip->ptp_clock_info.name, sizeof(chip->ptp_clock_info.name), + dev_name(chip->dev)); + chip->ptp_clock_info.max_adj = 1000000; + + chip->ptp_clock_info.n_ext_ts = 1; + chip->ptp_clock_info.n_per_out = 0; + chip->ptp_clock_info.n_pins = mv88e6xxx_num_gpio(chip); + chip->ptp_clock_info.pps = 0; + + for (i = 0; i < chip->ptp_clock_info.n_pins; ++i) { + struct ptp_pin_desc *ppd = &chip->pin_config[i]; + + snprintf(ppd->name, sizeof(ppd->name), "mv88e6xxx_gpio%d", i); + ppd->index = i; + ppd->func = PTP_PF_NONE; + } + chip->ptp_clock_info.pin_config = chip->pin_config; + + chip->ptp_clock_info.adjfine = mv88e6xxx_ptp_adjfine; + chip->ptp_clock_info.adjtime = mv88e6xxx_ptp_adjtime; + chip->ptp_clock_info.gettime64 = mv88e6xxx_ptp_gettime; + chip->ptp_clock_info.settime64 = mv88e6xxx_ptp_settime; + chip->ptp_clock_info.enable = mv88e6xxx_ptp_enable; + chip->ptp_clock_info.verify = mv88e6xxx_ptp_verify; + chip->ptp_clock_info.do_aux_work = mv88e6xxx_hwtstamp_work; + + chip->ptp_clock = ptp_clock_register(&chip->ptp_clock_info, chip->dev); + if (IS_ERR(chip->ptp_clock)) + return PTR_ERR(chip->ptp_clock); + + schedule_delayed_work(&chip->overflow_work, + MV88E6XXX_TAI_OVERFLOW_PERIOD); + + return 0; +} + +void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip) +{ + if (chip->ptp_clock) { + cancel_delayed_work_sync(&chip->overflow_work); + cancel_delayed_work_sync(&chip->tai_event_work); + + ptp_clock_unregister(chip->ptp_clock); + chip->ptp_clock = NULL; + } +} diff --git a/drivers/net/dsa/mv88e6xxx/ptp.h b/drivers/net/dsa/mv88e6xxx/ptp.h new file mode 100644 index 000000000000..992818ade746 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/ptp.h @@ -0,0 +1,108 @@ +/* + * Marvell 88E6xxx Switch PTP support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2017 National Instruments + * Erik Hons <erik.hons@ni.com> + * Brandon Streiff <brandon.streiff@ni.com> + * Dane Wagner <dane.wagner@ni.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _MV88E6XXX_PTP_H +#define _MV88E6XXX_PTP_H + +#include "chip.h" + +/* Offset 0x00: TAI Global Config */ +#define MV88E6XXX_TAI_CFG 0x00 +#define MV88E6XXX_TAI_CFG_CAP_OVERWRITE 0x8000 +#define MV88E6XXX_TAI_CFG_CAP_CTR_START 0x4000 +#define MV88E6XXX_TAI_CFG_EVREQ_FALLING 0x2000 +#define MV88E6XXX_TAI_CFG_TRIG_ACTIVE_LO 0x1000 +#define MV88E6XXX_TAI_CFG_IRL_ENABLE 0x0400 +#define MV88E6XXX_TAI_CFG_TRIG_IRQ_EN 0x0200 +#define MV88E6XXX_TAI_CFG_EVREQ_IRQ_EN 0x0100 +#define MV88E6XXX_TAI_CFG_TRIG_LOCK 0x0080 +#define MV88E6XXX_TAI_CFG_BLOCK_UPDATE 0x0008 +#define MV88E6XXX_TAI_CFG_MULTI_PTP 0x0004 +#define MV88E6XXX_TAI_CFG_TRIG_MODE_ONESHOT 0x0002 +#define MV88E6XXX_TAI_CFG_TRIG_ENABLE 0x0001 + +/* Offset 0x01: Timestamp Clock Period (ps) */ +#define MV88E6XXX_TAI_CLOCK_PERIOD 0x01 + +/* Offset 0x02/0x03: Trigger Generation Amount */ +#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_LO 0x02 +#define MV88E6XXX_TAI_TRIG_GEN_AMOUNT_HI 0x03 + +/* Offset 0x04: Clock Compensation */ +#define MV88E6XXX_TAI_TRIG_CLOCK_COMP 0x04 + +/* Offset 0x05: Trigger Configuration */ +#define MV88E6XXX_TAI_TRIG_CFG 0x05 + +/* Offset 0x06: Ingress Rate Limiter Clock Generation Amount */ +#define MV88E6XXX_TAI_IRL_AMOUNT 0x06 + +/* Offset 0x07: Ingress Rate Limiter Compensation */ +#define MV88E6XXX_TAI_IRL_COMP 0x07 + +/* Offset 0x08: Ingress Rate Limiter Compensation */ +#define MV88E6XXX_TAI_IRL_COMP_PS 0x08 + +/* Offset 0x09: Event Status */ +#define MV88E6XXX_TAI_EVENT_STATUS 0x09 +#define MV88E6XXX_TAI_EVENT_STATUS_CAP_TRIG 0x4000 +#define MV88E6XXX_TAI_EVENT_STATUS_ERROR 0x0200 +#define MV88E6XXX_TAI_EVENT_STATUS_VALID 0x0100 +#define MV88E6XXX_TAI_EVENT_STATUS_CTR_MASK 0x00ff + +/* Offset 0x0A/0x0B: Event Time */ +#define MV88E6XXX_TAI_EVENT_TIME_LO 0x0a +#define MV88E6XXX_TAI_EVENT_TYPE_HI 0x0b + +/* Offset 0x0E/0x0F: PTP Global Time */ +#define MV88E6XXX_TAI_TIME_LO 0x0e +#define MV88E6XXX_TAI_TIME_HI 0x0f + +/* Offset 0x10/0x11: Trig Generation Time */ +#define MV88E6XXX_TAI_TRIG_TIME_LO 0x10 +#define MV88E6XXX_TAI_TRIG_TIME_HI 0x11 + +/* Offset 0x12: Lock Status */ +#define MV88E6XXX_TAI_LOCK_STATUS 0x12 + +#ifdef CONFIG_NET_DSA_MV88E6XXX_PTP + +long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp); +int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip); +void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip); + +#define ptp_to_chip(ptp) container_of(ptp, struct mv88e6xxx_chip, \ + ptp_clock_info) + +#else /* !CONFIG_NET_DSA_MV88E6XXX_PTP */ + +static long mv88e6xxx_hwtstamp_work(struct ptp_clock_info *ptp) +{ + return -1; +} + +static inline int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip) +{ + return 0; +} + +static void mv88e6xxx_ptp_free(struct mv88e6xxx_chip *chip) +{ +} + +#endif /* CONFIG_NET_DSA_MV88E6XXX_PTP */ + +#endif /* _MV88E6XXX_PTP_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 00a1d2d13169..9da6f57901a9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -878,6 +878,86 @@ static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type, &payload->start, &payload->end); } +static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win, + int mtype, u32 addr, u32 len, void *hbuf) +{ + u32 win_pf, memoffset, mem_aperture, mem_base; + struct adapter *adap = pdbg_init->adap; + u32 pos, offset, resid; + u32 *res_buf; + u64 *buf; + int ret; + + /* Argument sanity checks ... + */ + if (addr & 0x3 || (uintptr_t)hbuf & 0x3) + return -EINVAL; + + buf = (u64 *)hbuf; + + /* Try to do 64-bit reads. Residual will be handled later. */ + resid = len & 0x7; + len -= resid; + + ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base, + &mem_aperture); + if (ret) + return ret; + + addr = addr + memoffset; + win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf); + + pos = addr & ~(mem_aperture - 1); + offset = addr - pos; + + /* Set up initial PCI-E Memory Window to cover the start of our + * transfer. + */ + t4_memory_update_win(adap, win, pos | win_pf); + + /* Transfer data from the adapter */ + while (len > 0) { + *buf++ = le64_to_cpu((__force __le64) + t4_read_reg64(adap, mem_base + offset)); + offset += sizeof(u64); + len -= sizeof(u64); + + /* If we've reached the end of our current window aperture, + * move the PCI-E Memory Window on to the next. + */ + if (offset == mem_aperture) { + pos += mem_aperture; + offset = 0; + t4_memory_update_win(adap, win, pos | win_pf); + } + } + + res_buf = (u32 *)buf; + /* Read residual in 32-bit multiples */ + while (resid > sizeof(u32)) { + *res_buf++ = le32_to_cpu((__force __le32) + t4_read_reg(adap, mem_base + offset)); + offset += sizeof(u32); + resid -= sizeof(u32); + + /* If we've reached the end of our current window aperture, + * move the PCI-E Memory Window on to the next. + */ + if (offset == mem_aperture) { + pos += mem_aperture; + offset = 0; + t4_memory_update_win(adap, win, pos | win_pf); + } + } + + /* Transfer residual < 32-bits */ + if (resid) + t4_memory_rw_residual(adap, resid, mem_base + offset, + (u8 *)res_buf, T4_MEMORY_READ); + + return 0; +} + #define CUDBG_YIELD_ITERATION 256 static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, @@ -937,10 +1017,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, goto skip_read; spin_lock(&padap->win0_lock); - rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type, - bytes_read, bytes, - (__be32 *)temp_buff.data, - 1); + rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type, + bytes_read, bytes, temp_buff.data); spin_unlock(&padap->win0_lock); if (rc) { cudbg_err->sys_err = rc; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 9040e13ce4b7..d3fa53db61ee 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1488,6 +1488,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg); u32 t4_get_util_window(struct adapter *adap); void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window); +int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off, + u32 *mem_base, u32 *mem_aperture); +void t4_memory_update_win(struct adapter *adap, int win, u32 addr); +void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf, + int dir); #define T4_MEMORY_WRITE 0 #define T4_MEMORY_READ 1 int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 920bccd6bc40..bd41f93f73ed 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -484,6 +484,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx) } /** + * t4_memory_rw_init - Get memory window relative offset, base, and size. + * @adap: the adapter + * @win: PCI-E Memory Window to use + * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC + * @mem_off: memory relative offset with respect to @mtype. + * @mem_base: configured memory base address. + * @mem_aperture: configured memory window aperture. + * + * Get the configured memory window's relative offset, base, and size. + */ +int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off, + u32 *mem_base, u32 *mem_aperture) +{ + u32 edc_size, mc_size, mem_reg; + + /* Offset into the region of memory which is being accessed + * MEM_EDC0 = 0 + * MEM_EDC1 = 1 + * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller + * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5) + * MEM_HMA = 4 + */ + edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A)); + if (mtype == MEM_HMA) { + *mem_off = 2 * (edc_size * 1024 * 1024); + } else if (mtype != MEM_MC1) { + *mem_off = (mtype * (edc_size * 1024 * 1024)); + } else { + mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap, + MA_EXT_MEMORY0_BAR_A)); + *mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024; + } + + /* Each PCI-E Memory Window is programmed with a window size -- or + * "aperture" -- which controls the granularity of its mapping onto + * adapter memory. We need to grab that aperture in order to know + * how to use the specified window. The window is also programmed + * with the base address of the Memory Window in BAR0's address + * space. For T4 this is an absolute PCI-E Bus Address. For T5 + * the address is relative to BAR0. + */ + mem_reg = t4_read_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, + win)); + /* a dead adapter will return 0xffffffff for PIO reads */ + if (mem_reg == 0xffffffff) + return -ENXIO; + + *mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X); + *mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X; + if (is_t4(adap->params.chip)) + *mem_base -= adap->t4_bar0; + + return 0; +} + +/** + * t4_memory_update_win - Move memory window to specified address. + * @adap: the adapter + * @win: PCI-E Memory Window to use + * @addr: location to move. + * + * Move memory window to specified address. + */ +void t4_memory_update_win(struct adapter *adap, int win, u32 addr) +{ + t4_write_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win), + addr); + /* Read it back to ensure that changes propagate before we + * attempt to use the new value. + */ + t4_read_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win)); +} + +/** + * t4_memory_rw_residual - Read/Write residual data. + * @adap: the adapter + * @off: relative offset within residual to start read/write. + * @addr: address within indicated memory type. + * @buf: host memory buffer + * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0) + * + * Read/Write residual data less than 32-bits. + */ +void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf, + int dir) +{ + union { + u32 word; + char byte[4]; + } last; + unsigned char *bp; + int i; + + if (dir == T4_MEMORY_READ) { + last.word = le32_to_cpu((__force __le32) + t4_read_reg(adap, addr)); + for (bp = (unsigned char *)buf, i = off; i < 4; i++) + bp[i] = last.byte[i]; + } else { + last.word = *buf; + for (i = off; i < 4; i++) + last.byte[i] = 0; + t4_write_reg(adap, addr, + (__force u32)cpu_to_le32(last.word)); + } +} + +/** * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window * @adap: the adapter * @win: PCI-E Memory Window to use @@ -504,8 +615,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, void *hbuf, int dir) { u32 pos, offset, resid, memoffset; - u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base; + u32 win_pf, mem_aperture, mem_base; u32 *buf; + int ret; /* Argument sanity checks ... */ @@ -521,59 +633,26 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, resid = len & 0x3; len -= resid; - /* Offset into the region of memory which is being accessed - * MEM_EDC0 = 0 - * MEM_EDC1 = 1 - * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller - * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5) - * MEM_HMA = 4 - */ - edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A)); - if (mtype == MEM_HMA) { - memoffset = 2 * (edc_size * 1024 * 1024); - } else if (mtype != MEM_MC1) { - memoffset = (mtype * (edc_size * 1024 * 1024)); - } else { - mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap, - MA_EXT_MEMORY0_BAR_A)); - memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024; - } + ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base, + &mem_aperture); + if (ret) + return ret; /* Determine the PCIE_MEM_ACCESS_OFFSET */ addr = addr + memoffset; - /* Each PCI-E Memory Window is programmed with a window size -- or - * "aperture" -- which controls the granularity of its mapping onto - * adapter memory. We need to grab that aperture in order to know - * how to use the specified window. The window is also programmed - * with the base address of the Memory Window in BAR0's address - * space. For T4 this is an absolute PCI-E Bus Address. For T5 - * the address is relative to BAR0. - */ - mem_reg = t4_read_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, - win)); - mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X); - mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X; - if (is_t4(adap->params.chip)) - mem_base -= adap->t4_bar0; win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf); /* Calculate our initial PCI-E Memory Window Position and Offset into * that Window. */ - pos = addr & ~(mem_aperture-1); + pos = addr & ~(mem_aperture - 1); offset = addr - pos; /* Set up initial PCI-E Memory Window to cover the start of our - * transfer. (Read it back to ensure that changes propagate before we - * attempt to use the new value.) + * transfer. */ - t4_write_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win), - pos | win_pf); - t4_read_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win)); + t4_memory_update_win(adap, win, pos | win_pf); /* Transfer data to/from the adapter as long as there's an integral * number of 32-bit transfers to complete. @@ -628,12 +707,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, if (offset == mem_aperture) { pos += mem_aperture; offset = 0; - t4_write_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, - win), pos | win_pf); - t4_read_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, - win)); + t4_memory_update_win(adap, win, pos | win_pf); } } @@ -642,28 +716,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, * residual amount. The PCI-E Memory Window has already been moved * above (if necessary) to cover this final transfer. */ - if (resid) { - union { - u32 word; - char byte[4]; - } last; - unsigned char *bp; - int i; - - if (dir == T4_MEMORY_READ) { - last.word = le32_to_cpu( - (__force __le32)t4_read_reg(adap, - mem_base + offset)); - for (bp = (unsigned char *)buf, i = resid; i < 4; i++) - bp[i] = last.byte[i]; - } else { - last.word = *buf; - for (i = resid; i < 4; i++) - last.byte[i] = 0; - t4_write_reg(adap, mem_base + offset, - (__force u32)cpu_to_le32(last.word)); - } - } + if (resid) + t4_memory_rw_residual(adap, resid, mem_base + offset, + (u8 *)buf, dir); return 0; } diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 7caa8da48421..a998c36c5e61 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2766,7 +2766,7 @@ static int dpaa_eth_probe(struct platform_device *pdev) priv->channel = (u16)channel; - /* Start a thread that will walk the CPUs with affine portals + /* Walk the CPUs with affine portals * and add this pool channel to each's dequeue mask. */ dpaa_eth_add_channel(priv->channel); diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index faea674094b9..85306d1b2acf 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -211,7 +211,7 @@ static int dpaa_set_pauseparam(struct net_device *net_dev, if (epause->rx_pause) newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause; if (epause->tx_pause) - newadv |= ADVERTISED_Asym_Pause; + newadv ^= ADVERTISED_Asym_Pause; oldadv = phydev->advertising & (ADVERTISED_Pause | ADVERTISED_Asym_Pause); diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 46e9f4e0a02c..36d9401a6258 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -507,6 +507,7 @@ struct i40e_pf { #define I40E_HW_STOP_FW_LLDP BIT(16) #define I40E_HW_PORT_ID_VALID BIT(17) #define I40E_HW_RESTART_AUTONEG BIT(18) +#define I40E_HW_STOPPABLE_FW_LLDP BIT(19) u64 flags; #define I40E_FLAG_RX_CSUM_ENABLED BIT_ULL(0) @@ -824,6 +825,7 @@ struct i40e_q_vector { struct i40e_ring_container rx; struct i40e_ring_container tx; + u8 itr_countdown; /* when 0 should adjust adaptive ITR */ u8 num_ringpairs; /* total number of ring pairs in vector */ cpumask_t affinity_mask; @@ -832,8 +834,6 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; -#define ITR_COUNTDOWN_START 100 - u8 itr_countdown; /* when 0 should adjust ITR */ } ____cacheline_internodealigned_in_smp; /* lan device */ @@ -1109,4 +1109,10 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate); +int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add); +int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add); #endif /* _I40E_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 4c3b4243cf65..b829fd365693 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -155,8 +155,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) dev_info(&pf->pdev->dev, " vlan_features = 0x%08lx\n", (unsigned long int)nd->vlan_features); } - dev_info(&pf->pdev->dev, - " vlgrp: & = %p\n", vsi->active_vlans); + dev_info(&pf->pdev->dev, " active_vlans is %s\n", + vsi->active_vlans ? "<valid>" : "<null>"); dev_info(&pf->pdev->dev, " flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n", vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags); @@ -270,14 +270,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) continue; dev_info(&pf->pdev->dev, - " rx_rings[%i]: desc = %p\n", - i, rx_ring->desc); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n", - i, rx_ring->dev, - rx_ring->netdev, - rx_ring->rx_bi); - dev_info(&pf->pdev->dev, " rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n", i, *rx_ring->state, rx_ring->queue_index, @@ -307,17 +299,12 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) rx_ring->rx_stats.realloc_count, rx_ring->rx_stats.page_reuse_count); dev_info(&pf->pdev->dev, - " rx_rings[%i]: size = %i, dma = 0x%08lx\n", - i, rx_ring->size, - (unsigned long int)rx_ring->dma); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: vsi = %p, q_vector = %p\n", - i, rx_ring->vsi, - rx_ring->q_vector); + " rx_rings[%i]: size = %i\n", + i, rx_ring->size); dev_info(&pf->pdev->dev, - " rx_rings[%i]: rx_itr_setting = %d (%s)\n", - i, rx_ring->rx_itr_setting, - ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed"); + " rx_rings[%i]: itr_setting = %d (%s)\n", + i, rx_ring->itr_setting, + ITR_IS_DYNAMIC(rx_ring->itr_setting) ? "dynamic" : "fixed"); } for (i = 0; i < vsi->num_queue_pairs; i++) { struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]); @@ -326,14 +313,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) continue; dev_info(&pf->pdev->dev, - " tx_rings[%i]: desc = %p\n", - i, tx_ring->desc); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n", - i, tx_ring->dev, - tx_ring->netdev, - tx_ring->tx_bi); - dev_info(&pf->pdev->dev, " tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n", i, *tx_ring->state, tx_ring->queue_index, @@ -355,20 +334,15 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) tx_ring->tx_stats.tx_busy, tx_ring->tx_stats.tx_done_old); dev_info(&pf->pdev->dev, - " tx_rings[%i]: size = %i, dma = 0x%08lx\n", - i, tx_ring->size, - (unsigned long int)tx_ring->dma); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: vsi = %p, q_vector = %p\n", - i, tx_ring->vsi, - tx_ring->q_vector); + " tx_rings[%i]: size = %i\n", + i, tx_ring->size); dev_info(&pf->pdev->dev, " tx_rings[%i]: DCB tc = %d\n", i, tx_ring->dcb_tc); dev_info(&pf->pdev->dev, - " tx_rings[%i]: tx_itr_setting = %d (%s)\n", - i, tx_ring->tx_itr_setting, - ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed"); + " tx_rings[%i]: itr_setting = %d (%s)\n", + i, tx_ring->itr_setting, + ITR_IS_DYNAMIC(tx_ring->itr_setting) ? "dynamic" : "fixed"); } rcu_read_unlock(); dev_info(&pf->pdev->dev, @@ -466,8 +440,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) vsi->info.resp_reserved[6], vsi->info.resp_reserved[7], vsi->info.resp_reserved[8], vsi->info.resp_reserved[9], vsi->info.resp_reserved[10], vsi->info.resp_reserved[11]); - if (vsi->back) - dev_info(&pf->pdev->dev, " PF = %p\n", vsi->back); dev_info(&pf->pdev->dev, " idx = %d\n", vsi->idx); dev_info(&pf->pdev->dev, " tc_config: numtc = %d, enabled_tc = 0x%x\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 2f5bee713fef..0dcbbda164c4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2244,14 +2244,14 @@ static int __i40e_get_coalesce(struct net_device *netdev, rx_ring = vsi->rx_rings[queue]; tx_ring = vsi->tx_rings[queue]; - if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) + if (ITR_IS_DYNAMIC(rx_ring->itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC; /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite @@ -2311,34 +2311,35 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_q_vector *q_vector; - u16 vector, intrl; + u16 intrl; intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit); - rx_ring->rx_itr_setting = ec->rx_coalesce_usecs; - tx_ring->tx_itr_setting = ec->tx_coalesce_usecs; + rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); + tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); if (ec->use_adaptive_rx_coalesce) - rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC; + rx_ring->itr_setting |= I40E_ITR_DYNAMIC; else - rx_ring->rx_itr_setting &= ~I40E_ITR_DYNAMIC; + rx_ring->itr_setting &= ~I40E_ITR_DYNAMIC; if (ec->use_adaptive_tx_coalesce) - tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC; + tx_ring->itr_setting |= I40E_ITR_DYNAMIC; else - tx_ring->tx_itr_setting &= ~I40E_ITR_DYNAMIC; + tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC; q_vector = rx_ring->q_vector; - q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector = tx_ring->q_vector; - q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); - wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); + /* The interrupt handler itself will take care of programming + * the Tx and Rx ITR values based on the values we have entered + * into the q_vector, no need to write the values now. + */ + + wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl); i40e_flush(hw); } @@ -2364,11 +2365,11 @@ static int __i40e_set_coalesce(struct net_device *netdev, vsi->work_limit = ec->tx_max_coalesced_frames_irq; if (queue < 0) { - cur_rx_itr = vsi->rx_rings[0]->rx_itr_setting; - cur_tx_itr = vsi->tx_rings[0]->tx_itr_setting; + cur_rx_itr = vsi->rx_rings[0]->itr_setting; + cur_tx_itr = vsi->tx_rings[0]->itr_setting; } else if (queue < vsi->num_queue_pairs) { - cur_rx_itr = vsi->rx_rings[queue]->rx_itr_setting; - cur_tx_itr = vsi->tx_rings[queue]->tx_itr_setting; + cur_rx_itr = vsi->rx_rings[queue]->itr_setting; + cur_tx_itr = vsi->tx_rings[queue]->itr_setting; } else { netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", vsi->num_queue_pairs - 1); @@ -2396,7 +2397,7 @@ static int __i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - if (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)) { + if (ec->rx_coalesce_usecs > I40E_MAX_ITR) { netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); return -EINVAL; } @@ -2407,16 +2408,16 @@ static int __i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - if (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)) { + if (ec->tx_coalesce_usecs > I40E_MAX_ITR) { netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); return -EINVAL; } if (ec->use_adaptive_rx_coalesce && !cur_rx_itr) - ec->rx_coalesce_usecs = I40E_MIN_ITR << 1; + ec->rx_coalesce_usecs = I40E_MIN_ITR; if (ec->use_adaptive_tx_coalesce && !cur_tx_itr) - ec->tx_coalesce_usecs = I40E_MIN_ITR << 1; + ec->tx_coalesce_usecs = I40E_MIN_ITR; intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high); vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg); @@ -4406,6 +4407,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) } flags_complete: + changed_flags = orig_flags ^ new_flags; + /* Before we finalize any flag changes, we need to perform some * checks to ensure that the changes are supported and safe. */ @@ -4415,21 +4418,17 @@ flags_complete: !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)) return -EOPNOTSUPP; - /* Disable FW LLDP not supported if NPAR active or if FW - * API version < 1.7 + /* If the driver detected FW LLDP was disabled on init, this flag could + * be set, however we do not support _changing_ the flag if NPAR is + * enabled or FW API version < 1.7. There are situations where older + * FW versions/NPAR enabled PFs could disable LLDP, however we _must_ + * not allow the user to enable/disable LLDP with this flag on + * unsupported FW versions. */ - if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) { - if (pf->hw.func_caps.npar_enable) { - dev_warn(&pf->pdev->dev, - "Unable to stop FW LLDP if NPAR active\n"); - return -EOPNOTSUPP; - } - - if (pf->hw.aq.api_maj_ver < 1 || - (pf->hw.aq.api_maj_ver == 1 && - pf->hw.aq.api_min_ver < 7)) { + if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { + if (!(pf->hw_features & I40E_HW_STOPPABLE_FW_LLDP)) { dev_warn(&pf->pdev->dev, - "FW ver does not support stopping FW LLDP\n"); + "Device does not support changing FW LLDP\n"); return -EOPNOTSUPP; } } @@ -4439,6 +4438,10 @@ flags_complete: * something else has modified the flags variable since we copied it * originally. We'll just punt with an error and log something in the * message buffer. + * + * This is the point of no return for this function. We need to have + * checked any discrepancies or misconfigurations and returned + * EOPNOTSUPP before updating pf->flags here. */ if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) { dev_warn(&pf->pdev->dev, @@ -4446,8 +4449,6 @@ flags_complete: return -EAGAIN; } - changed_flags = orig_flags ^ new_flags; - /* Process any additional changes needed as a result of flag changes. * The changed_flags value reflects the list of bits that were * changed in the code above. @@ -4479,6 +4480,12 @@ flags_complete: } } + if ((changed_flags & pf->flags & + I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) && + (pf->flags & I40E_FLAG_MFP_ENABLED)) + dev_warn(&pf->pdev->dev, + "Turning on link-down-on-close flag may affect other partitions\n"); + if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) { struct i40e_dcbx_config *dcbcfg; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e31adbc75f9c..f6d37456f3b7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -69,12 +69,6 @@ static int i40e_reset(struct i40e_pf *pf); static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); -static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, - struct i40e_cloud_filter *filter, - bool add); -static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, - struct i40e_cloud_filter *filter, - bool add); static int i40e_get_capabilities(struct i40e_pf *pf, enum i40e_admin_queue_opc list_type); @@ -215,8 +209,8 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) { dev_info(&pf->pdev->dev, - "param err: pile=%p needed=%d id=0x%04x\n", - pile, needed, id); + "param err: pile=%s needed=%d id=0x%04x\n", + pile ? "<valid>" : "<null>", needed, id); return -EINVAL; } @@ -1380,14 +1374,7 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, ether_addr_copy(f->macaddr, macaddr); f->vlan = vlan; - /* If we're in overflow promisc mode, set the state directly - * to failed, so we don't bother to try sending the filter - * to the hardware. - */ - if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state)) - f->state = I40E_FILTER_FAILED; - else - f->state = I40E_FILTER_NEW; + f->state = I40E_FILTER_NEW; INIT_HLIST_NODE(&f->hlist); key = i40e_addr_to_hkey(macaddr); @@ -2116,17 +2103,16 @@ void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name, * @list: the list of filters to send to firmware * @add_head: Position in the add hlist * @num_add: the number of filters to add - * @promisc_change: set to true on exit if promiscuous mode was forced on * * Send a request to firmware via AdminQ to add a chunk of filters. Will set - * promisc_changed to true if the firmware has run out of space for more - * filters. + * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of + * space for more filters. */ static void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, struct i40e_aqc_add_macvlan_element_data *list, struct i40e_new_mac_filter *add_head, - int num_add, bool *promisc_changed) + int num_add) { struct i40e_hw *hw = &vsi->back->hw; int aq_err, fcnt; @@ -2136,7 +2122,6 @@ void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name, fcnt = i40e_update_filter_state(num_add, list, add_head); if (fcnt != num_add) { - *promisc_changed = true; set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, promiscuous mode forced on\n", @@ -2177,11 +2162,13 @@ i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name, NULL); } - if (aq_ret) + if (aq_ret) { + set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); dev_warn(&vsi->back->pdev->dev, - "Error %s setting broadcast promiscuous mode on %s\n", + "Error %s, forcing overflow promiscuous on %s\n", i40e_aq_str(hw, hw->aq.asq_last_status), vsi_name); + } return aq_ret; } @@ -2267,9 +2254,9 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) struct i40e_mac_filter *f; struct i40e_new_mac_filter *new, *add_head = NULL; struct i40e_hw *hw = &vsi->back->hw; + bool old_overflow, new_overflow; unsigned int failed_filters = 0; unsigned int vlan_filters = 0; - bool promisc_changed = false; char vsi_name[16] = "PF"; int filter_list_len = 0; i40e_status aq_ret = 0; @@ -2291,6 +2278,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) usleep_range(1000, 2000); pf = vsi->back; + old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); + if (vsi->netdev) { changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags; vsi->current_netdev_flags = vsi->netdev->flags; @@ -2423,12 +2412,6 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) num_add = 0; hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { - if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, - vsi->state)) { - new->state = I40E_FILTER_FAILED; - continue; - } - /* handle broadcast filters by updating the broadcast * promiscuous flag instead of adding a MAC filter. */ @@ -2464,15 +2447,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) /* flush a full buffer */ if (num_add == filter_list_len) { i40e_aqc_add_filters(vsi, vsi_name, add_list, - add_head, num_add, - &promisc_changed); + add_head, num_add); memset(add_list, 0, list_size); num_add = 0; } } if (num_add) { i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head, - num_add, &promisc_changed); + num_add); } /* Now move all of the filters from the temp add list back to * the VSI's list. @@ -2501,24 +2483,16 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } spin_unlock_bh(&vsi->mac_filter_hash_lock); - /* If promiscuous mode has changed, we need to calculate a new - * threshold for when we are safe to exit - */ - if (promisc_changed) - vsi->promisc_threshold = (vsi->active_filters * 3) / 4; - /* Check if we are able to exit overflow promiscuous mode. We can * safely exit if we didn't just enter, we no longer have any failed * filters, and we have reduced filters below the threshold value. */ - if (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) && - !promisc_changed && !failed_filters && - (vsi->active_filters < vsi->promisc_threshold)) { + if (old_overflow && !failed_filters && + vsi->active_filters < vsi->promisc_threshold) { dev_info(&pf->pdev->dev, "filter logjam cleared on %s, leaving overflow promiscuous mode\n", vsi_name); clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); - promisc_changed = true; vsi->promisc_threshold = 0; } @@ -2528,6 +2502,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) goto out; } + new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); + + /* If we are entering overflow promiscuous, we need to calculate a new + * threshold for when we are safe to exit + */ + if (!old_overflow && new_overflow) + vsi->promisc_threshold = (vsi->active_filters * 3) / 4; + /* check for changes in promiscuous modes */ if (changed_flags & IFF_ALLMULTI) { bool cur_multipromisc; @@ -2548,12 +2530,11 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } } - if ((changed_flags & IFF_PROMISC) || promisc_changed) { + if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) { bool cur_promisc; cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) || - test_bit(__I40E_VSI_OVERFLOW_PROMISC, - vsi->state)); + new_overflow); aq_ret = i40e_set_promiscuous(pf, cur_promisc); if (aq_ret) { retval = i40e_aq_rc_to_posix(aq_ret, @@ -3449,15 +3430,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++, vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[i]; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting); - q_vector->rx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = + ITR_TO_REG(vsi->rx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), - q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting); - q_vector->tx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = + ITR_TO_REG(vsi->tx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), - q_vector->tx.itr); + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + wr32(hw, I40E_PFINT_RATEN(vector - 1), i40e_intrl_usec_to_reg(vsi->int_rate_limit)); @@ -3558,13 +3544,14 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) u32 val; /* set the ITR configuration */ - q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting); - q_vector->rx.latency_range = I40E_LOW_LATENCY; - wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting); - q_vector->tx.latency_range = I40E_LOW_LATENCY; - wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr); + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting); + wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting); + wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; i40e_enable_misc_int_causes(pf); @@ -5375,7 +5362,7 @@ out: * @vsi: VSI to be configured * **/ -int i40e_get_link_speed(struct i40e_vsi *vsi) +static int i40e_get_link_speed(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; @@ -6848,8 +6835,8 @@ i40e_set_cld_element(struct i40e_cloud_filter *filter, * Add or delete a cloud filter for a specific flow spec. * Returns 0 if the filter were successfully added. **/ -static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, - struct i40e_cloud_filter *filter, bool add) +int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, bool add) { struct i40e_aqc_cloud_filters_element_data cld_filter; struct i40e_pf *pf = vsi->back; @@ -6915,9 +6902,9 @@ static int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, * Add or delete a cloud filter for a specific flow spec using big buffer. * Returns 0 if the filter were successfully added. **/ -static int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, - struct i40e_cloud_filter *filter, - bool add) +int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, + struct i40e_cloud_filter *filter, + bool add) { struct i40e_aqc_cloud_filters_element_bb cld_filter; struct i40e_pf *pf = vsi->back; @@ -9215,6 +9202,17 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } i40e_get_oem_version(&pf->hw); + if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && + ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) || + hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) { + /* The following delay is necessary for 4.33 firmware and older + * to recover after EMP reset. 200 ms should suffice but we + * put here 300 ms to be sure that FW is ready to operate + * after reset. + */ + mdelay(300); + } + /* re-verify the eeprom if we just had an EMP reset */ if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) i40e_verify_eeprom(pf); @@ -9937,18 +9935,17 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) mutex_lock(&pf->switch_mutex); if (!pf->vsi[vsi->idx]) { - dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](%p,type %d)\n", - vsi->idx, vsi->idx, vsi, vsi->type); + dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n", + vsi->idx, vsi->idx, vsi->type); goto unlock_vsi; } if (pf->vsi[vsi->idx] != vsi) { dev_err(&pf->pdev->dev, - "pf->vsi[%d](%p, type %d) != vsi[%d](%p,type %d): no free!\n", + "pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n", pf->vsi[vsi->idx]->idx, - pf->vsi[vsi->idx], pf->vsi[vsi->idx]->type, - vsi->idx, vsi, vsi->type); + vsi->idx, vsi->type); goto unlock_vsi; } @@ -10018,7 +10015,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ring->dcb_tc = 0; if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; - ring->tx_itr_setting = pf->tx_itr_default; + ring->itr_setting = pf->tx_itr_default; vsi->tx_rings[i] = ring++; if (!i40e_enabled_xdp_vsi(vsi)) @@ -10036,7 +10033,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; set_ring_xdp(ring); - ring->tx_itr_setting = pf->tx_itr_default; + ring->itr_setting = pf->tx_itr_default; vsi->xdp_rings[i] = ring++; setup_rx: @@ -10049,7 +10046,7 @@ setup_rx: ring->count = vsi->num_desc; ring->size = 0; ring->dcb_tc = 0; - ring->rx_itr_setting = pf->rx_itr_default; + ring->itr_setting = pf->rx_itr_default; vsi->rx_rings[i] = ring; } @@ -10328,9 +10325,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu) netif_napi_add(vsi->netdev, &q_vector->napi, i40e_napi_poll, NAPI_POLL_WEIGHT); - q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->tx.latency_range = I40E_LOW_LATENCY; - /* tie q_vector and vsi together */ vsi->q_vectors[v_idx] = q_vector; @@ -11089,6 +11083,16 @@ static int i40e_sw_init(struct i40e_pf *pf) /* IWARP needs one extra vector for CQP just like MISC.*/ pf->num_iwarp_msix = (int)num_online_cpus() + 1; } + /* Stopping the FW LLDP engine is only supported on the + * XL710 with a FW ver >= 1.7. Also, stopping FW LLDP + * engine is not supported if NPAR is functioning on this + * part + */ + if (pf->hw.mac.type == I40E_MAC_XL710 && + !pf->hw.func_caps.npar_enable && + (pf->hw.aq.api_maj_ver > 1 || + (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver > 6))) + pf->hw_features |= I40E_HW_STOPPABLE_FW_LLDP; #ifdef CONFIG_PCI_IOV if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index e554aa6cf070..1ec9b1d8023d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -995,99 +995,241 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) } } +static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) +{ + return &q_vector->rx == rc; +} + +static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector) +{ + unsigned int divisor; + + switch (q_vector->vsi->back->hw.phy.link_info.link_speed) { + case I40E_LINK_SPEED_40GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024; + break; + case I40E_LINK_SPEED_25GB: + case I40E_LINK_SPEED_20GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512; + break; + default: + case I40E_LINK_SPEED_10GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256; + break; + case I40E_LINK_SPEED_1GB: + case I40E_LINK_SPEED_100MB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32; + break; + } + + return divisor; +} + /** - * i40e_set_new_dynamic_itr - Find new ITR level + * i40e_update_itr - update the dynamic ITR value based on statistics + * @q_vector: structure containing interrupt and ring information * @rc: structure containing ring performance data * - * Returns true if ITR changed, false if not - * - * Stores a new ITR value based on packets and byte counts during - * the last interrupt. The advantage of per interrupt computation - * is faster updates and more accurate ITR for the current traffic - * pattern. Constants in this function were computed based on - * theoretical maximum wire speed and thresholds were set based on - * testing data as well as attempting to minimize response time + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time * while increasing bulk throughput. **/ -static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) +static void i40e_update_itr(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) { - enum i40e_latency_range new_latency_range = rc->latency_range; - u32 new_itr = rc->itr; - int bytes_per_usec; - unsigned int usecs, estimated_usecs; + unsigned int avg_wire_size, packets, bytes, itr; + unsigned long next_update = jiffies; - if (rc->total_packets == 0 || !rc->itr) - return false; + /* If we don't have any rings just leave ourselves set for maximum + * possible latency so we take ourselves out of the equation. + */ + if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting)) + return; - usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_usec = rc->total_bytes / usecs; + /* For Rx we want to push the delay up and default to low latency. + * for Tx we want to pull the delay down and default to high latency. + */ + itr = i40e_container_is_rx(q_vector, rc) ? + I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY : + I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY; + + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, rc->next_update)) + goto clear_counts; + + /* If itr_countdown is set it means we programmed an ITR within + * the last 4 interrupt cycles. This has a side effect of us + * potentially firing an early interrupt. In order to work around + * this we need to throw out any data received for a few + * interrupts following the update. + */ + if (q_vector->itr_countdown) { + itr = rc->target_itr; + goto clear_counts; + } + + packets = rc->total_packets; + bytes = rc->total_bytes; - /* The calculations in this algorithm depend on interrupts actually - * firing at the ITR rate. This may not happen if the packet rate is - * really low, or if we've been napi polling. Check to make sure - * that's not the case before we continue. + if (i40e_container_is_rx(q_vector, rc)) { + /* If Rx there are 1 to 4 packets and bytes are less than + * 9000 assume insufficient data to use bulk rate limiting + * approach unless Tx is already in bulk rate limiting. We + * are likely latency driven. + */ + if (packets && packets < 4 && bytes < 9000 && + (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) { + itr = I40E_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + } else if (packets < 4) { + /* If we have Tx and Rx ITR maxed and Tx ITR is running in + * bulk mode and we are receiving 4 or fewer packets just + * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so + * that the Rx can relax. + */ + if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS && + (q_vector->rx.target_itr & I40E_ITR_MASK) == + I40E_ITR_ADAPTIVE_MAX_USECS) + goto clear_counts; + } else if (packets > 32) { + /* If we have processed over 32 packets in a single interrupt + * for Tx assume we need to switch over to "bulk" mode. + */ + rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY; + } + + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * Between 4 and 56 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. */ - estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); - if (estimated_usecs > usecs) { - new_latency_range = I40E_LOW_LATENCY; - goto reset_latency; + if (packets < 56) { + itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC; + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; + } + goto clear_counts; } - /* simple throttlerate management - * 0-10MB/s lowest (50000 ints/s) - * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (18000 ints/s) + if (packets <= 256) { + itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); + itr &= I40E_ITR_MASK; + + /* Between 56 and 112 is our "goldilocks" zone where we are + * working out "just right". Just report that our current + * ITR is good for us. + */ + if (packets <= 112) + goto clear_counts; + + /* If packet count is 128 or greater we are likely looking + * at a slight overrun of the delay we want. Try halving + * our delay to see if that will cut the number of packets + * in half per interrupt. + */ + itr /= 2; + itr &= I40E_ITR_MASK; + if (itr < I40E_ITR_ADAPTIVE_MIN_USECS) + itr = I40E_ITR_ADAPTIVE_MIN_USECS; + + goto clear_counts; + } + + /* The paths below assume we are dealing with a bulk ITR since + * number of packets is greater than 256. We are just going to have + * to compute a value and try to bring the count under control, + * though for smaller packet sizes there isn't much we can do as + * NAPI polling will likely be kicking in sooner rather than later. + */ + itr = I40E_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * give the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value * - * The math works out because the divisor is in 10^(-6) which - * turns the bytes/us input value into MB/s values, but - * make sure to use usecs, as the register values written - * are in 2 usec increments in the ITR registers, and make sure - * to use the smoothed values that the countdown timer gives us. + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. */ - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - if (bytes_per_usec > 10) - new_latency_range = I40E_LOW_LATENCY; - break; - case I40E_LOW_LATENCY: - if (bytes_per_usec > 20) - new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_usec <= 10) - new_latency_range = I40E_LOWEST_LATENCY; - break; - case I40E_BULK_LATENCY: - default: - if (bytes_per_usec <= 20) - new_latency_range = I40E_LOW_LATENCY; - break; + if (avg_wire_size <= 60) { + /* Start at 250k ints/sec */ + avg_wire_size = 4096; + } else if (avg_wire_size <= 380) { + /* 250K ints/sec to 60K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 1696; + } else if (avg_wire_size <= 1084) { + /* 60K ints/sec to 36K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 36K ints/sec to 30K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 30K ints/sec */ + avg_wire_size = 32256; } -reset_latency: - rc->latency_range = new_latency_range; + /* If we are in low latency mode halve our delay which doubles the + * rate to somewhere between 100K to 16K ints/sec + */ + if (itr & I40E_ITR_ADAPTIVE_LATENCY) + avg_wire_size /= 2; - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - new_itr = I40E_ITR_50K; - break; - case I40E_LOW_LATENCY: - new_itr = I40E_ITR_20K; - break; - case I40E_BULK_LATENCY: - new_itr = I40E_ITR_18K; - break; - default: - break; + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) * + I40E_ITR_ADAPTIVE_MIN_INC; + + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; } +clear_counts: + /* write back value */ + rc->target_itr = itr; + + /* next update should occur within next jiffy */ + rc->next_update = next_update + 1; + rc->total_bytes = 0; rc->total_packets = 0; - rc->last_itr_update = jiffies; - - if (new_itr != rc->itr) { - rc->itr = new_itr; - return true; - } - return false; } /** @@ -1991,7 +2133,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, * @rx_buffer: rx buffer to pull data from * * This function will clean up the contents of the rx_buffer. It will - * either recycle the bufer or unmap it and free the associated resources. + * either recycle the buffer or unmap it and free the associated resources. */ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer) @@ -2274,29 +2416,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_packets; } -static u32 i40e_buildreg_itr(const int type, const u16 itr) +static inline u32 i40e_buildreg_itr(const int type, u16 itr) { u32 val; + /* We don't bother with setting the CLEARPBA bit as the data sheet + * points out doing so is "meaningless since it was already + * auto-cleared". The auto-clearing happens when the interrupt is + * asserted. + * + * Hardware errata 28 for also indicates that writing to a + * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear + * an event in the PBA anyway so we need to rely on the automask + * to hold pending events for us until the interrupt is re-enabled + * + * The itr value is reported in microseconds, and the register + * value is recorded in 2 microsecond units. For this reason we + * only need to shift by the interval shift - 1 instead of the + * full value. + */ + itr &= I40E_ITR_MASK; + val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | - (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); + (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1)); return val; } /* a small macro to shorten up some long lines */ #define INTREG I40E_PFINT_DYN_CTLN -static inline int get_rx_itr(struct i40e_vsi *vsi, int idx) -{ - return vsi->rx_rings[idx]->rx_itr_setting; -} -static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) -{ - return vsi->tx_rings[idx]->tx_itr_setting; -} +/* The act of updating the ITR will cause it to immediately trigger. In order + * to prevent this from throwing off adaptive update statistics we defer the + * update so that it can only happen so often. So after either Tx or Rx are + * updated we make the adaptive scheme wait until either the ITR completely + * expires via the next_update expiration or we have been through at least + * 3 interrupts. + */ +#define ITR_COUNTDOWN_START 3 /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -2308,10 +2466,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { struct i40e_hw *hw = &vsi->back->hw; - bool rx = false, tx = false; - u32 rxval, txval; - int idx = q_vector->v_idx; - int rx_itr_setting, tx_itr_setting; + u32 intval; /* If we don't have MSIX, then we only need to re-enable icr0 */ if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { @@ -2319,65 +2474,49 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, return; } - /* avoid dynamic calculation if in countdown mode OR if - * all dynamic is disabled - */ - rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); - - rx_itr_setting = get_rx_itr(vsi, idx); - tx_itr_setting = get_tx_itr(vsi, idx); - - if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(rx_itr_setting) && - !ITR_IS_DYNAMIC(tx_itr_setting))) { - goto enable_int; - } - - if (ITR_IS_DYNAMIC(rx_itr_setting)) { - rx = i40e_set_new_dynamic_itr(&q_vector->rx); - rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); - } - - if (ITR_IS_DYNAMIC(tx_itr_setting)) { - tx = i40e_set_new_dynamic_itr(&q_vector->tx); - txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); - } + /* These will do nothing if dynamic updates are not enabled */ + i40e_update_itr(q_vector, &q_vector->tx); + i40e_update_itr(q_vector, &q_vector->rx); - if (rx || tx) { - /* get the higher of the two ITR adjustments and - * use the same value for both ITR registers - * when in adaptive mode (Rx and/or Tx) - */ - u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); - - q_vector->tx.itr = q_vector->rx.itr = itr; - txval = i40e_buildreg_itr(I40E_TX_ITR, itr); - tx = true; - rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); - rx = true; - } - - /* only need to enable the interrupt once, but need - * to possibly update both ITR values + /* This block of logic allows us to get away with only updating + * one ITR value with each interrupt. The idea is to perform a + * pseudo-lazy update with the following criteria. + * + * 1. Rx is given higher priority than Tx if both are in same state + * 2. If we must reduce an ITR that is given highest priority. + * 3. We then give priority to increasing ITR based on amount. */ - if (rx) { - /* set the INTENA_MSK_MASK so that this first write - * won't actually enable the interrupt, instead just - * updating the ITR (it's bit 31 PF and VF) + if (q_vector->rx.target_itr < q_vector->rx.current_itr) { + /* Rx ITR needs to be reduced, this is highest priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || + ((q_vector->rx.target_itr - q_vector->rx.current_itr) < + (q_vector->tx.target_itr - q_vector->tx.current_itr))) { + /* Tx ITR needs to be reduced, this is second priority + * Tx ITR needs to be increased more than Rx, fourth priority */ - rxval |= BIT(31); - /* don't check _DOWN because interrupt isn't being enabled */ - wr32(hw, INTREG(q_vector->reg_idx), rxval); + intval = i40e_buildreg_itr(I40E_TX_ITR, + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { + /* Rx ITR needs to be increased, third priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else { + /* No ITR update, lowest priority */ + intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (q_vector->itr_countdown) + q_vector->itr_countdown--; } -enable_int: if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), txval); - - if (q_vector->itr_countdown) - q_vector->itr_countdown--; - else - q_vector->itr_countdown = ITR_COUNTDOWN_START; + wr32(hw, INTREG(q_vector->reg_idx), intval); } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 701b708628b0..f75a8fe68fcf 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -30,32 +30,37 @@ #include <net/xdp.h> /* Interrupt Throttling and Rate Limiting Goodies */ - -#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */ -#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */ -#define I40E_ITR_100K 0x0005 -#define I40E_ITR_50K 0x000A -#define I40E_ITR_20K 0x0019 -#define I40E_ITR_18K 0x001B -#define I40E_ITR_8K 0x003E -#define I40E_ITR_4K 0x007A -#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ -#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ -#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ #define I40E_DEFAULT_IRQ_WORK 256 -#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1) -#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC)) -#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1) + +/* The datasheet for the X710 and XL710 indicate that the maximum value for + * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec + * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing + * the register value which is divided by 2 lets use the actual values and + * avoid an excessive amount of translation. + */ +#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ +#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */ +#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */ +#define I40E_ITR_100K 10 /* all values below must be even */ +#define I40E_ITR_50K 20 +#define I40E_ITR_20K 50 +#define I40E_ITR_18K 60 +#define I40E_ITR_8K 122 +#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */ +#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC) +#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK) +#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC)) + +#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) + /* 0x40 is the enable bit for interrupt rate limiting, and must be set if * the value of the rate limit is non-zero */ #define INTRL_ENA BIT(6) +#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) + /** * i40e_intrl_usec_to_reg - convert interrupt rate limit to register * @intrl: interrupt rate limit to convert @@ -382,8 +387,7 @@ struct i40e_ring { * these values always store the USER setting, and must be converted * before programming to a register. */ - u16 rx_itr_setting; - u16 tx_itr_setting; + u16 itr_setting; u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -459,21 +463,21 @@ static inline void set_ring_xdp(struct i40e_ring *ring) ring->flags |= I40E_TXR_FLAGS_XDP; } -enum i40e_latency_range { - I40E_LOWEST_LATENCY = 0, - I40E_LOW_LATENCY = 1, - I40E_BULK_LATENCY = 2, -}; +#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002 +#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002 +#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e +#define I40E_ITR_ADAPTIVE_LATENCY 0x8000 +#define I40E_ITR_ADAPTIVE_BULK 0x0000 +#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY)) struct i40e_ring_container { - /* array of pointers to rings */ - struct i40e_ring *ring; + struct i40e_ring *ring; /* pointer to linked list of ring(s) */ + unsigned long next_update; /* jiffies value of next update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_packets; /* total packets processed this int */ - unsigned long last_itr_update; /* jiffies of last ITR update */ u16 count; - enum i40e_latency_range latency_range; - u16 itr; + u16 target_itr; /* target ITR setting for ring(s) */ + u16 current_itr; /* current ITR setting for ring(s) */ }; /* iterator for handling rings in ring container */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index cd294e6a8587..b0eed8c0b2f2 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -39,7 +39,7 @@ #define I40E_MASK(mask, shift) ((u32)(mask) << (shift)) #define I40E_MAX_VSI_QP 16 -#define I40E_MAX_VF_VSI 3 +#define I40E_MAX_VF_VSI 4 #define I40E_MAX_CHAINED_RX_BUFFERS 5 #define I40E_MAX_PF_UDP_OFFLOAD_PORTS 16 diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index e9309fb9084b..5cca083da93c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -258,6 +258,38 @@ static u16 i40e_vc_get_pf_queue_id(struct i40e_vf *vf, u16 vsi_id, } /** + * i40e_get_real_pf_qid + * @vf: pointer to the VF info + * @vsi_id: vsi id + * @queue_id: queue number + * + * wrapper function to get pf_queue_id handling ADq code as well + **/ +static u16 i40e_get_real_pf_qid(struct i40e_vf *vf, u16 vsi_id, u16 queue_id) +{ + int i; + + if (vf->adq_enabled) { + /* Although VF considers all the queues(can be 1 to 16) as its + * own but they may actually belong to different VSIs(up to 4). + * We need to find which queues belongs to which VSI. + */ + for (i = 0; i < vf->num_tc; i++) { + if (queue_id < vf->ch[i].num_qps) { + vsi_id = vf->ch[i].vsi_id; + break; + } + /* find right queue id which is relative to a + * given VSI. + */ + queue_id -= vf->ch[i].num_qps; + } + } + + return i40e_vc_get_pf_queue_id(vf, vsi_id, queue_id); +} + +/** * i40e_config_irq_link_list * @vf: pointer to the VF info * @vsi_id: id of VSI as given by the FW @@ -310,7 +342,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; - pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); + pf_queue_id = i40e_get_real_pf_qid(vf, vsi_id, vsi_queue_id); reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id); wr32(hw, reg_idx, reg); @@ -333,8 +365,9 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, if (next_q < size) { vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES; qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES; - pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, - vsi_queue_id); + pf_queue_id = i40e_get_real_pf_qid(vf, + vsi_id, + vsi_queue_id); } else { pf_queue_id = I40E_QUEUE_END_OF_LIST; qtype = 0; @@ -669,18 +702,20 @@ error_param: /** * i40e_alloc_vsi_res * @vf: pointer to the VF info - * @type: type of VSI to allocate + * @idx: VSI index, applies only for ADq mode, zero otherwise * * alloc VF vsi context & resources **/ -static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) +static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx) { struct i40e_mac_filter *f = NULL; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi; + u64 max_tx_rate = 0; int ret = 0; - vsi = i40e_vsi_setup(pf, type, pf->vsi[pf->lan_vsi]->seid, vf->vf_id); + vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, pf->vsi[pf->lan_vsi]->seid, + vf->vf_id); if (!vsi) { dev_err(&pf->pdev->dev, @@ -689,7 +724,8 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) ret = -ENOENT; goto error_alloc_vsi_res; } - if (type == I40E_VSI_SRIOV) { + + if (!idx) { u64 hena = i40e_pf_get_default_rss_hena(pf); u8 broadcast[ETH_ALEN]; @@ -721,17 +757,29 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type) spin_unlock_bh(&vsi->mac_filter_hash_lock); wr32(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena); wr32(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), (u32)(hena >> 32)); + /* program mac filter only for VF VSI */ + ret = i40e_sync_vsi_filters(vsi); + if (ret) + dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); } - /* program mac filter */ - ret = i40e_sync_vsi_filters(vsi); - if (ret) - dev_err(&pf->pdev->dev, "Unable to program ucast filters\n"); + /* storing VSI index and id for ADq and don't apply the mac filter */ + if (vf->adq_enabled) { + vf->ch[idx].vsi_idx = vsi->idx; + vf->ch[idx].vsi_id = vsi->id; + } /* Set VF bandwidth if specified */ if (vf->tx_rate) { + max_tx_rate = vf->tx_rate; + } else if (vf->ch[idx].max_tx_rate) { + max_tx_rate = vf->ch[idx].max_tx_rate; + } + + if (max_tx_rate) { + max_tx_rate = div_u64(max_tx_rate, I40E_BW_CREDIT_DIVISOR); ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, - vf->tx_rate / 50, 0, NULL); + max_tx_rate, 0, NULL); if (ret) dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n", vf->vf_id, ret); @@ -742,6 +790,92 @@ error_alloc_vsi_res: } /** + * i40e_map_pf_queues_to_vsi + * @vf: pointer to the VF info + * + * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This + * function takes care of first part VSILAN_QTABLE, mapping pf queues to VSI. + **/ +static void i40e_map_pf_queues_to_vsi(struct i40e_vf *vf) +{ + struct i40e_pf *pf = vf->pf; + struct i40e_hw *hw = &pf->hw; + u32 reg, num_tc = 1; /* VF has at least one traffic class */ + u16 vsi_id, qps; + int i, j; + + if (vf->adq_enabled) + num_tc = vf->num_tc; + + for (i = 0; i < num_tc; i++) { + if (vf->adq_enabled) { + qps = vf->ch[i].num_qps; + vsi_id = vf->ch[i].vsi_id; + } else { + qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; + vsi_id = vf->lan_vsi_id; + } + + for (j = 0; j < 7; j++) { + if (j * 2 >= qps) { + /* end of list */ + reg = 0x07FF07FF; + } else { + u16 qid = i40e_vc_get_pf_queue_id(vf, + vsi_id, + j * 2); + reg = qid; + qid = i40e_vc_get_pf_queue_id(vf, vsi_id, + (j * 2) + 1); + reg |= qid << 16; + } + i40e_write_rx_ctl(hw, + I40E_VSILAN_QTABLE(j, vsi_id), + reg); + } + } +} + +/** + * i40e_map_pf_to_vf_queues + * @vf: pointer to the VF info + * + * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This + * function takes care of the second part VPLAN_QTABLE & completes VF mappings. + **/ +static void i40e_map_pf_to_vf_queues(struct i40e_vf *vf) +{ + struct i40e_pf *pf = vf->pf; + struct i40e_hw *hw = &pf->hw; + u32 reg, total_qps = 0; + u32 qps, num_tc = 1; /* VF has at least one traffic class */ + u16 vsi_id, qid; + int i, j; + + if (vf->adq_enabled) + num_tc = vf->num_tc; + + for (i = 0; i < num_tc; i++) { + if (vf->adq_enabled) { + qps = vf->ch[i].num_qps; + vsi_id = vf->ch[i].vsi_id; + } else { + qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; + vsi_id = vf->lan_vsi_id; + } + + for (j = 0; j < qps; j++) { + qid = i40e_vc_get_pf_queue_id(vf, vsi_id, j); + + reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK); + wr32(hw, I40E_VPLAN_QTABLE(total_qps, vf->vf_id), + reg); + total_qps++; + } + } +} + +/** * i40e_enable_vf_mappings * @vf: pointer to the VF info * @@ -751,8 +885,7 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf) { struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; - u32 reg, total_queue_pairs = 0; - int j; + u32 reg; /* Tell the hardware we're using noncontiguous mapping. HW requires * that VF queues be mapped using this method, even when they are @@ -765,30 +898,8 @@ static void i40e_enable_vf_mappings(struct i40e_vf *vf) reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK; wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), reg); - /* map PF queues to VF queues */ - for (j = 0; j < pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; j++) { - u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, j); - - reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK); - wr32(hw, I40E_VPLAN_QTABLE(total_queue_pairs, vf->vf_id), reg); - total_queue_pairs++; - } - - /* map PF queues to VSI */ - for (j = 0; j < 7; j++) { - if (j * 2 >= pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs) { - reg = 0x07FF07FF; /* unused */ - } else { - u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, - j * 2); - reg = qid; - qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_id, - (j * 2) + 1); - reg |= qid << 16; - } - i40e_write_rx_ctl(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id), - reg); - } + i40e_map_pf_to_vf_queues(vf); + i40e_map_pf_queues_to_vsi(vf); i40e_flush(hw); } @@ -824,7 +935,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf) struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; u32 reg_idx, reg; - int i, msix_vf; + int i, j, msix_vf; /* Start by disabling VF's configuration API to prevent the OS from * accessing the VF's VSI after it's freed / invalidated. @@ -846,6 +957,20 @@ static void i40e_free_vf_res(struct i40e_vf *vf) vf->lan_vsi_id = 0; vf->num_mac = 0; } + + /* do the accounting and remove additional ADq VSI's */ + if (vf->adq_enabled && vf->ch[0].vsi_idx) { + for (j = 0; j < vf->num_tc; j++) { + /* At this point VSI0 is already released so don't + * release it again and only clear their values in + * structure variables + */ + if (j) + i40e_vsi_release(pf->vsi[vf->ch[j].vsi_idx]); + vf->ch[j].vsi_idx = 0; + vf->ch[j].vsi_id = 0; + } + } msix_vf = pf->hw.func_caps.num_msix_vectors_vf; /* disable interrupts so the VF starts in a known state */ @@ -891,7 +1016,7 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf) { struct i40e_pf *pf = vf->pf; int total_queue_pairs = 0; - int ret; + int ret, idx; if (vf->num_req_queues && vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF) @@ -900,11 +1025,30 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf) pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF; /* allocate hw vsi context & associated resources */ - ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV); + ret = i40e_alloc_vsi_res(vf, 0); if (ret) goto error_alloc; total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs; + /* allocate additional VSIs based on tc information for ADq */ + if (vf->adq_enabled) { + if (pf->queues_left >= + (I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF)) { + /* TC 0 always belongs to VF VSI */ + for (idx = 1; idx < vf->num_tc; idx++) { + ret = i40e_alloc_vsi_res(vf, idx); + if (ret) + goto error_alloc; + } + /* send correct number of queues */ + total_queue_pairs = I40E_MAX_VF_QUEUES; + } else { + dev_info(&pf->pdev->dev, "VF %d: Not enough queues to allocate, disabling ADq\n", + vf->vf_id); + vf->adq_enabled = false; + } + } + /* We account for each VF to get a default number of queue pairs. If * the VF has now requested more, we need to account for that to make * certain we never request more queues than we actually have left in @@ -1537,6 +1681,27 @@ static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg) } /** + * i40e_del_qch - delete all the additional VSIs created as a part of ADq + * @vf: pointer to VF structure + **/ +static void i40e_del_qch(struct i40e_vf *vf) +{ + struct i40e_pf *pf = vf->pf; + int i; + + /* first element in the array belongs to primary VF VSI and we shouldn't + * delete it. We should however delete the rest of the VSIs created + */ + for (i = 1; i < vf->num_tc; i++) { + if (vf->ch[i].vsi_idx) { + i40e_vsi_release(pf->vsi[vf->ch[i].vsi_idx]); + vf->ch[i].vsi_idx = 0; + vf->ch[i].vsi_id = 0; + } + } +} + +/** * i40e_vc_get_vf_resources_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1631,6 +1796,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADQ; + vfres->num_vsis = num_vsis; vfres->num_queue_pairs = vf->num_queue_pairs; vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; @@ -1855,27 +2023,37 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) (struct virtchnl_vsi_queue_config_info *)msg; struct virtchnl_queue_pair_info *qpi; struct i40e_pf *pf = vf->pf; - u16 vsi_id, vsi_queue_id; + u16 vsi_id, vsi_queue_id = 0; i40e_status aq_ret = 0; - int i; + int i, j = 0, idx = 0; + + vsi_id = qci->vsi_id; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto error_param; } - vsi_id = qci->vsi_id; if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; } + for (i = 0; i < qci->num_queue_pairs; i++) { qpi = &qci->qpair[i]; - vsi_queue_id = qpi->txq.queue_id; - if ((qpi->txq.vsi_id != vsi_id) || - (qpi->rxq.vsi_id != vsi_id) || - (qpi->rxq.queue_id != vsi_queue_id) || - !i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) { + + if (!vf->adq_enabled) { + vsi_queue_id = qpi->txq.queue_id; + + if (qpi->txq.vsi_id != qci->vsi_id || + qpi->rxq.vsi_id != qci->vsi_id || + qpi->rxq.queue_id != vsi_queue_id) { + aq_ret = I40E_ERR_PARAM; + goto error_param; + } + } + + if (!i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -1887,9 +2065,33 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) aq_ret = I40E_ERR_PARAM; goto error_param; } + + /* For ADq there can be up to 4 VSIs with max 4 queues each. + * VF does not know about these additional VSIs and all + * it cares is about its own queues. PF configures these queues + * to its appropriate VSIs based on TC mapping + **/ + if (vf->adq_enabled) { + if (j == (vf->ch[idx].num_qps - 1)) { + idx++; + j = 0; /* resetting the queue count */ + vsi_queue_id = 0; + } else { + j++; + vsi_queue_id++; + } + vsi_id = vf->ch[idx].vsi_id; + } } /* set vsi num_queue_pairs in use to num configured by VF */ - pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs; + if (!vf->adq_enabled) { + pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = + qci->num_queue_pairs; + } else { + for (i = 0; i < vf->num_tc; i++) + pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs = + vf->ch[i].num_qps; + } error_param: /* send the response to the VF */ @@ -1898,6 +2100,33 @@ error_param: } /** + * i40e_validate_queue_map + * @vsi_id: vsi id + * @queuemap: Tx or Rx queue map + * + * check if Tx or Rx queue map is valid + **/ +static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id, + unsigned long queuemap) +{ + u16 vsi_queue_id, queue_id; + + for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) { + if (vf->adq_enabled) { + vsi_id = vf->ch[vsi_queue_id / I40E_MAX_VF_VSI].vsi_id; + queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF); + } else { + queue_id = vsi_queue_id; + } + + if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id)) + return -EINVAL; + } + + return 0; +} + +/** * i40e_vc_config_irq_map_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1911,9 +2140,8 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) struct virtchnl_irq_map_info *irqmap_info = (struct virtchnl_irq_map_info *)msg; struct virtchnl_vector_map *map; - u16 vsi_id, vsi_queue_id, vector_id; + u16 vsi_id, vector_id; i40e_status aq_ret = 0; - unsigned long tempmap; int i; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { @@ -1923,7 +2151,6 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) for (i = 0; i < irqmap_info->num_vectors; i++) { map = &irqmap_info->vecmap[i]; - vector_id = map->vector_id; vsi_id = map->vsi_id; /* validate msg params */ @@ -1933,23 +2160,14 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) goto error_param; } - /* lookout for the invalid queue index */ - tempmap = map->rxq_map; - for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) { - if (!i40e_vc_isvalid_queue_id(vf, vsi_id, - vsi_queue_id)) { - aq_ret = I40E_ERR_PARAM; - goto error_param; - } + if (i40e_validate_queue_map(vf, vsi_id, map->rxq_map)) { + aq_ret = I40E_ERR_PARAM; + goto error_param; } - tempmap = map->txq_map; - for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) { - if (!i40e_vc_isvalid_queue_id(vf, vsi_id, - vsi_queue_id)) { - aq_ret = I40E_ERR_PARAM; - goto error_param; - } + if (i40e_validate_queue_map(vf, vsi_id, map->txq_map)) { + aq_ret = I40E_ERR_PARAM; + goto error_param; } i40e_config_irq_link_list(vf, vsi_id, map); @@ -1975,6 +2193,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) struct i40e_pf *pf = vf->pf; u16 vsi_id = vqs->vsi_id; i40e_status aq_ret = 0; + int i; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; @@ -1993,6 +2212,16 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx])) aq_ret = I40E_ERR_TIMEOUT; + + /* need to start the rings for additional ADq VSI's as well */ + if (vf->adq_enabled) { + /* zero belongs to LAN VSI */ + for (i = 1; i < vf->num_tc; i++) { + if (i40e_vsi_start_rings(pf->vsi[vf->ch[i].vsi_idx])) + aq_ret = I40E_ERR_TIMEOUT; + } + } + error_param: /* send the response to the VF */ return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, @@ -2688,6 +2917,618 @@ err: } /** + * i40e_validate_cloud_filter + * @mask: mask for TC filter + * @data: data for TC filter + * + * This function validates cloud filter programmed as TC filter for ADq + **/ +static int i40e_validate_cloud_filter(struct i40e_vf *vf, + struct virtchnl_filter *tc_filter) +{ + struct virtchnl_l4_spec mask = tc_filter->mask.tcp_spec; + struct virtchnl_l4_spec data = tc_filter->data.tcp_spec; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + struct i40e_mac_filter *f; + struct hlist_node *h; + bool found = false; + int bkt; + + if (!tc_filter->action) { + dev_info(&pf->pdev->dev, + "VF %d: Currently ADq doesn't support Drop Action\n", + vf->vf_id); + goto err; + } + + /* action_meta is TC number here to which the filter is applied */ + if (!tc_filter->action_meta || + tc_filter->action_meta > I40E_MAX_VF_VSI) { + dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n", + vf->vf_id, tc_filter->action_meta); + goto err; + } + + /* Check filter if it's programmed for advanced mode or basic mode. + * There are two ADq modes (for VF only), + * 1. Basic mode: intended to allow as many filter options as possible + * to be added to a VF in Non-trusted mode. Main goal is + * to add filters to its own MAC and VLAN id. + * 2. Advanced mode: is for allowing filters to be applied other than + * its own MAC or VLAN. This mode requires the VF to be + * Trusted. + */ + if (mask.dst_mac[0] && !mask.dst_ip[0]) { + vsi = pf->vsi[vf->lan_vsi_idx]; + f = i40e_find_mac(vsi, data.dst_mac); + + if (!f) { + dev_info(&pf->pdev->dev, + "Destination MAC %pM doesn't belong to VF %d\n", + data.dst_mac, vf->vf_id); + goto err; + } + + if (mask.vlan_id) { + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, + hlist) { + if (f->vlan == ntohs(data.vlan_id)) { + found = true; + break; + } + } + if (!found) { + dev_info(&pf->pdev->dev, + "VF %d doesn't have any VLAN id %u\n", + vf->vf_id, ntohs(data.vlan_id)); + goto err; + } + } + } else { + /* Check if VF is trusted */ + if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) { + dev_err(&pf->pdev->dev, + "VF %d not trusted, make VF trusted to add advanced mode ADq cloud filters\n", + vf->vf_id); + return I40E_ERR_CONFIG; + } + } + + if (mask.dst_mac[0] & data.dst_mac[0]) { + if (is_broadcast_ether_addr(data.dst_mac) || + is_zero_ether_addr(data.dst_mac)) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Dest MAC addr %pM\n", + vf->vf_id, data.dst_mac); + goto err; + } + } + + if (mask.src_mac[0] & data.src_mac[0]) { + if (is_broadcast_ether_addr(data.src_mac) || + is_zero_ether_addr(data.src_mac)) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Source MAC addr %pM\n", + vf->vf_id, data.src_mac); + goto err; + } + } + + if (mask.dst_port & data.dst_port) { + if (!data.dst_port || be16_to_cpu(data.dst_port) > 0xFFFF) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Dest port\n", + vf->vf_id); + goto err; + } + } + + if (mask.src_port & data.src_port) { + if (!data.src_port || be16_to_cpu(data.src_port) > 0xFFFF) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Source port\n", + vf->vf_id); + goto err; + } + } + + if (tc_filter->flow_type != VIRTCHNL_TCP_V6_FLOW && + tc_filter->flow_type != VIRTCHNL_TCP_V4_FLOW) { + dev_info(&pf->pdev->dev, "VF %d: Invalid Flow type\n", + vf->vf_id); + goto err; + } + + if (mask.vlan_id & data.vlan_id) { + if (ntohs(data.vlan_id) > I40E_MAX_VLANID) { + dev_info(&pf->pdev->dev, "VF %d: invalid VLAN ID\n", + vf->vf_id); + goto err; + } + } + + return I40E_SUCCESS; +err: + return I40E_ERR_CONFIG; +} + +/** + * i40e_find_vsi_from_seid - searches for the vsi with the given seid + * @vf: pointer to the VF info + * @seid - seid of the vsi it is searching for + **/ +static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid) +{ + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + int i; + + for (i = 0; i < vf->num_tc ; i++) { + vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id); + if (vsi->seid == seid) + return vsi; + } + return NULL; +} + +/** + * i40e_del_all_cloud_filters + * @vf: pointer to the VF info + * + * This function deletes all cloud filters + **/ +static void i40e_del_all_cloud_filters(struct i40e_vf *vf) +{ + struct i40e_cloud_filter *cfilter = NULL; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + struct hlist_node *node; + int ret; + + hlist_for_each_entry_safe(cfilter, node, + &vf->cloud_filter_list, cloud_node) { + vsi = i40e_find_vsi_from_seid(vf, cfilter->seid); + + if (!vsi) { + dev_err(&pf->pdev->dev, "VF %d: no VSI found for matching %u seid, can't delete cloud filter\n", + vf->vf_id, cfilter->seid); + continue; + } + + if (cfilter->dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, + false); + else + ret = i40e_add_del_cloud_filter(vsi, cfilter, false); + if (ret) + dev_err(&pf->pdev->dev, + "VF %d: Failed to delete cloud filter, err %s aq_err %s\n", + vf->vf_id, i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, + pf->hw.aq.asq_last_status)); + + hlist_del(&cfilter->cloud_node); + kfree(cfilter); + vf->num_cloud_filters--; + } +} + +/** + * i40e_vc_del_cloud_filter + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * This function deletes a cloud filter programmed as TC filter for ADq + **/ +static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) +{ + struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg; + struct virtchnl_l4_spec mask = vcf->mask.tcp_spec; + struct virtchnl_l4_spec tcf = vcf->data.tcp_spec; + struct i40e_cloud_filter cfilter, *cf = NULL; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + struct hlist_node *node; + i40e_status aq_ret = 0; + int i, ret; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (!vf->adq_enabled) { + dev_info(&pf->pdev->dev, + "VF %d: ADq not enabled, can't apply cloud filter\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (i40e_validate_cloud_filter(vf, vcf)) { + dev_info(&pf->pdev->dev, + "VF %d: Invalid input, can't apply cloud filter\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + memset(&cfilter, 0, sizeof(cfilter)); + /* parse destination mac address */ + for (i = 0; i < ETH_ALEN; i++) + cfilter.dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i]; + + /* parse source mac address */ + for (i = 0; i < ETH_ALEN; i++) + cfilter.src_mac[i] = mask.src_mac[i] & tcf.src_mac[i]; + + cfilter.vlan_id = mask.vlan_id & tcf.vlan_id; + cfilter.dst_port = mask.dst_port & tcf.dst_port; + cfilter.src_port = mask.src_port & tcf.src_port; + + switch (vcf->flow_type) { + case VIRTCHNL_TCP_V4_FLOW: + cfilter.n_proto = ETH_P_IP; + if (mask.dst_ip[0] & tcf.dst_ip[0]) + memcpy(&cfilter.ip.v4.dst_ip, tcf.dst_ip, + ARRAY_SIZE(tcf.dst_ip)); + else if (mask.src_ip[0] & tcf.dst_ip[0]) + memcpy(&cfilter.ip.v4.src_ip, tcf.src_ip, + ARRAY_SIZE(tcf.dst_ip)); + break; + case VIRTCHNL_TCP_V6_FLOW: + cfilter.n_proto = ETH_P_IPV6; + if (mask.dst_ip[3] & tcf.dst_ip[3]) + memcpy(&cfilter.ip.v6.dst_ip6, tcf.dst_ip, + sizeof(cfilter.ip.v6.dst_ip6)); + if (mask.src_ip[3] & tcf.src_ip[3]) + memcpy(&cfilter.ip.v6.src_ip6, tcf.src_ip, + sizeof(cfilter.ip.v6.src_ip6)); + break; + default: + /* TC filter can be configured based on different combinations + * and in this case IP is not a part of filter config + */ + dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n", + vf->vf_id); + } + + /* get the vsi to which the tc belongs to */ + vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx]; + cfilter.seid = vsi->seid; + cfilter.flags = vcf->field_flags; + + /* Deleting TC filter */ + if (tcf.dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, &cfilter, false); + else + ret = i40e_add_del_cloud_filter(vsi, &cfilter, false); + if (ret) { + dev_err(&pf->pdev->dev, + "VF %d: Failed to delete cloud filter, err %s aq_err %s\n", + vf->vf_id, i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + goto err; + } + + hlist_for_each_entry_safe(cf, node, + &vf->cloud_filter_list, cloud_node) { + if (cf->seid != cfilter.seid) + continue; + if (mask.dst_port) + if (cfilter.dst_port != cf->dst_port) + continue; + if (mask.dst_mac[0]) + if (!ether_addr_equal(cf->src_mac, cfilter.src_mac)) + continue; + /* for ipv4 data to be valid, only first byte of mask is set */ + if (cfilter.n_proto == ETH_P_IP && mask.dst_ip[0]) + if (memcmp(&cfilter.ip.v4.dst_ip, &cf->ip.v4.dst_ip, + ARRAY_SIZE(tcf.dst_ip))) + continue; + /* for ipv6, mask is set for all sixteen bytes (4 words) */ + if (cfilter.n_proto == ETH_P_IPV6 && mask.dst_ip[3]) + if (memcmp(&cfilter.ip.v6.dst_ip6, &cf->ip.v6.dst_ip6, + sizeof(cfilter.ip.v6.src_ip6))) + continue; + if (mask.vlan_id) + if (cfilter.vlan_id != cf->vlan_id) + continue; + + hlist_del(&cf->cloud_node); + kfree(cf); + vf->num_cloud_filters--; + } + +err: + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_CLOUD_FILTER, + aq_ret); +} + +/** + * i40e_vc_add_cloud_filter + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * This function adds a cloud filter programmed as TC filter for ADq + **/ +static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) +{ + struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg; + struct virtchnl_l4_spec mask = vcf->mask.tcp_spec; + struct virtchnl_l4_spec tcf = vcf->data.tcp_spec; + struct i40e_cloud_filter *cfilter = NULL; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = NULL; + i40e_status aq_ret = 0; + int i, ret; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (!vf->adq_enabled) { + dev_info(&pf->pdev->dev, + "VF %d: ADq is not enabled, can't apply cloud filter\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (i40e_validate_cloud_filter(vf, vcf)) { + dev_info(&pf->pdev->dev, + "VF %d: Invalid input/s, can't apply cloud filter\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL); + if (!cfilter) + return -ENOMEM; + + /* parse destination mac address */ + for (i = 0; i < ETH_ALEN; i++) + cfilter->dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i]; + + /* parse source mac address */ + for (i = 0; i < ETH_ALEN; i++) + cfilter->src_mac[i] = mask.src_mac[i] & tcf.src_mac[i]; + + cfilter->vlan_id = mask.vlan_id & tcf.vlan_id; + cfilter->dst_port = mask.dst_port & tcf.dst_port; + cfilter->src_port = mask.src_port & tcf.src_port; + + switch (vcf->flow_type) { + case VIRTCHNL_TCP_V4_FLOW: + cfilter->n_proto = ETH_P_IP; + if (mask.dst_ip[0] & tcf.dst_ip[0]) + memcpy(&cfilter->ip.v4.dst_ip, tcf.dst_ip, + ARRAY_SIZE(tcf.dst_ip)); + else if (mask.src_ip[0] & tcf.dst_ip[0]) + memcpy(&cfilter->ip.v4.src_ip, tcf.src_ip, + ARRAY_SIZE(tcf.dst_ip)); + break; + case VIRTCHNL_TCP_V6_FLOW: + cfilter->n_proto = ETH_P_IPV6; + if (mask.dst_ip[3] & tcf.dst_ip[3]) + memcpy(&cfilter->ip.v6.dst_ip6, tcf.dst_ip, + sizeof(cfilter->ip.v6.dst_ip6)); + if (mask.src_ip[3] & tcf.src_ip[3]) + memcpy(&cfilter->ip.v6.src_ip6, tcf.src_ip, + sizeof(cfilter->ip.v6.src_ip6)); + break; + default: + /* TC filter can be configured based on different combinations + * and in this case IP is not a part of filter config + */ + dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n", + vf->vf_id); + } + + /* get the VSI to which the TC belongs to */ + vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx]; + cfilter->seid = vsi->seid; + cfilter->flags = vcf->field_flags; + + /* Adding cloud filter programmed as TC filter */ + if (tcf.dst_port) + ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true); + else + ret = i40e_add_del_cloud_filter(vsi, cfilter, true); + if (ret) { + dev_err(&pf->pdev->dev, + "VF %d: Failed to add cloud filter, err %s aq_err %s\n", + vf->vf_id, i40e_stat_str(&pf->hw, ret), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + goto err; + } + + INIT_HLIST_NODE(&cfilter->cloud_node); + hlist_add_head(&cfilter->cloud_node, &vf->cloud_filter_list); + vf->num_cloud_filters++; +err: + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_CLOUD_FILTER, + aq_ret); +} + +/** + * i40e_vc_add_qch_msg: Add queue channel and enable ADq + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + **/ +static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) +{ + struct virtchnl_tc_info *tci = + (struct virtchnl_tc_info *)msg; + struct i40e_pf *pf = vf->pf; + struct i40e_link_status *ls = &pf->hw.phy.link_info; + int i, adq_request_qps = 0, speed = 0; + i40e_status aq_ret = 0; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* ADq cannot be applied if spoof check is ON */ + if (vf->spoofchk) { + dev_err(&pf->pdev->dev, + "Spoof check is ON, turn it OFF to enable ADq\n"); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)) { + dev_err(&pf->pdev->dev, + "VF %d attempting to enable ADq, but hasn't properly negotiated that capability\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* max number of traffic classes for VF currently capped at 4 */ + if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) { + dev_err(&pf->pdev->dev, + "VF %d trying to set %u TCs, valid range 1-4 TCs per VF\n", + vf->vf_id, tci->num_tc); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* validate queues for each TC */ + for (i = 0; i < tci->num_tc; i++) + if (!tci->list[i].count || + tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) { + dev_err(&pf->pdev->dev, + "VF %d: TC %d trying to set %u queues, valid range 1-4 queues per TC\n", + vf->vf_id, i, tci->list[i].count); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* need Max VF queues but already have default number of queues */ + adq_request_qps = I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF; + + if (pf->queues_left < adq_request_qps) { + dev_err(&pf->pdev->dev, + "No queues left to allocate to VF %d\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } else { + /* we need to allocate max VF queues to enable ADq so as to + * make sure ADq enabled VF always gets back queues when it + * goes through a reset. + */ + vf->num_queue_pairs = I40E_MAX_VF_QUEUES; + } + + /* get link speed in MB to validate rate limit */ + switch (ls->link_speed) { + case VIRTCHNL_LINK_SPEED_100MB: + speed = SPEED_100; + break; + case VIRTCHNL_LINK_SPEED_1GB: + speed = SPEED_1000; + break; + case VIRTCHNL_LINK_SPEED_10GB: + speed = SPEED_10000; + break; + case VIRTCHNL_LINK_SPEED_20GB: + speed = SPEED_20000; + break; + case VIRTCHNL_LINK_SPEED_25GB: + speed = SPEED_25000; + break; + case VIRTCHNL_LINK_SPEED_40GB: + speed = SPEED_40000; + break; + default: + dev_err(&pf->pdev->dev, + "Cannot detect link speed\n"); + aq_ret = I40E_ERR_PARAM; + goto err; + } + + /* parse data from the queue channel info */ + vf->num_tc = tci->num_tc; + for (i = 0; i < vf->num_tc; i++) { + if (tci->list[i].max_tx_rate) { + if (tci->list[i].max_tx_rate > speed) { + dev_err(&pf->pdev->dev, + "Invalid max tx rate %llu specified for VF %d.", + tci->list[i].max_tx_rate, + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + goto err; + } else { + vf->ch[i].max_tx_rate = + tci->list[i].max_tx_rate; + } + } + vf->ch[i].num_qps = tci->list[i].count; + } + + /* set this flag only after making sure all inputs are sane */ + vf->adq_enabled = true; + /* num_req_queues is set when user changes number of queues via ethtool + * and this causes issue for default VSI(which depends on this variable) + * when ADq is enabled, hence reset it. + */ + vf->num_req_queues = 0; + + /* reset the VF in order to allocate resources */ + i40e_vc_notify_vf_reset(vf); + i40e_reset_vf(vf, false); + + return I40E_SUCCESS; + + /* send the response to the VF */ +err: + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_CHANNELS, + aq_ret); +} + +/** + * i40e_vc_del_qch_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + **/ +static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) +{ + struct i40e_pf *pf = vf->pf; + i40e_status aq_ret = 0; + + if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + + if (vf->adq_enabled) { + i40e_del_all_cloud_filters(vf); + i40e_del_qch(vf); + vf->adq_enabled = false; + vf->num_tc = 0; + dev_info(&pf->pdev->dev, + "Deleting Queue Channels and cloud filters for ADq on VF %d\n", + vf->vf_id); + } else { + dev_info(&pf->pdev->dev, "VF %d trying to delete queue channels but ADq isn't enabled\n", + vf->vf_id); + aq_ret = I40E_ERR_PARAM; + } + + /* reset the VF in order to allocate resources */ + i40e_vc_notify_vf_reset(vf); + i40e_reset_vf(vf, false); + + return I40E_SUCCESS; + +err: + return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_CHANNELS, + aq_ret); +} + +/** * i40e_vc_process_vf_msg * @pf: pointer to the PF structure * @vf_id: source VF id @@ -2816,7 +3657,18 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, case VIRTCHNL_OP_REQUEST_QUEUES: ret = i40e_vc_request_queues_msg(vf, msg, msglen); break; - + case VIRTCHNL_OP_ENABLE_CHANNELS: + ret = i40e_vc_add_qch_msg(vf, msg); + break; + case VIRTCHNL_OP_DISABLE_CHANNELS: + ret = i40e_vc_del_qch_msg(vf, msg); + break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: + ret = i40e_vc_add_cloud_filter(vf, msg); + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: + ret = i40e_vc_del_cloud_filter(vf, msg); + break; case VIRTCHNL_OP_UNKNOWN: default: dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n", @@ -3382,6 +4234,16 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) i40e_vc_disable_vf(vf); dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", vf_id, setting ? "" : "un"); + + if (vf->adq_enabled) { + if (!vf->trusted) { + dev_info(&pf->pdev->dev, + "VF %u no longer Trusted, deleting all cloud filters\n", + vf_id); + i40e_del_all_cloud_filters(vf); + } + } + out: return ret; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 5efc4f92bb37..6852599b2379 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -69,6 +69,19 @@ enum i40e_vf_capabilities { I40E_VIRTCHNL_VF_CAP_IWARP, }; +/* In ADq, max 4 VSI's can be allocated per VF including primary VF VSI. + * These variables are used to store indices, id's and number of queues + * for each VSI including that of primary VF VSI. Each Traffic class is + * termed as channel and each channel can in-turn have 4 queues which + * means max 16 queues overall per VF. + */ +struct i40evf_channel { + u16 vsi_idx; /* index in PF struct for all channel VSIs */ + u16 vsi_id; /* VSI ID used by firmware */ + u16 num_qps; /* number of queue pairs requested by user */ + u64 max_tx_rate; /* bandwidth rate allocation for VSIs */ +}; + /* VF information structure */ struct i40e_vf { struct i40e_pf *pf; @@ -111,6 +124,13 @@ struct i40e_vf { u16 num_mac; u16 num_vlan; + /* ADq related variables */ + bool adq_enabled; /* flag to enable adq */ + u8 num_tc; + struct i40evf_channel ch[I40E_MAX_VF_VSI]; + struct hlist_head cloud_filter_list; + u16 num_cloud_filters; + /* RDMA Client */ struct virtchnl_iwarp_qvlist_info *qvlist_info; }; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 357d6051281f..eb8f3e327f6b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -392,99 +392,241 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) val); } +static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) +{ + return &q_vector->rx == rc; +} + +static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector) +{ + unsigned int divisor; + + switch (q_vector->adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024; + break; + case I40E_LINK_SPEED_25GB: + case I40E_LINK_SPEED_20GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512; + break; + default: + case I40E_LINK_SPEED_10GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256; + break; + case I40E_LINK_SPEED_1GB: + case I40E_LINK_SPEED_100MB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32; + break; + } + + return divisor; +} + /** - * i40e_set_new_dynamic_itr - Find new ITR level + * i40e_update_itr - update the dynamic ITR value based on statistics + * @q_vector: structure containing interrupt and ring information * @rc: structure containing ring performance data * - * Returns true if ITR changed, false if not - * - * Stores a new ITR value based on packets and byte counts during - * the last interrupt. The advantage of per interrupt computation - * is faster updates and more accurate ITR for the current traffic - * pattern. Constants in this function were computed based on - * theoretical maximum wire speed and thresholds were set based on - * testing data as well as attempting to minimize response time + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time * while increasing bulk throughput. **/ -static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) +static void i40e_update_itr(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) { - enum i40e_latency_range new_latency_range = rc->latency_range; - u32 new_itr = rc->itr; - int bytes_per_usec; - unsigned int usecs, estimated_usecs; + unsigned int avg_wire_size, packets, bytes, itr; + unsigned long next_update = jiffies; - if (rc->total_packets == 0 || !rc->itr) - return false; + /* If we don't have any rings just leave ourselves set for maximum + * possible latency so we take ourselves out of the equation. + */ + if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting)) + return; + + /* For Rx we want to push the delay up and default to low latency. + * for Tx we want to pull the delay down and default to high latency. + */ + itr = i40e_container_is_rx(q_vector, rc) ? + I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY : + I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY; + + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, rc->next_update)) + goto clear_counts; + + /* If itr_countdown is set it means we programmed an ITR within + * the last 4 interrupt cycles. This has a side effect of us + * potentially firing an early interrupt. In order to work around + * this we need to throw out any data received for a few + * interrupts following the update. + */ + if (q_vector->itr_countdown) { + itr = rc->target_itr; + goto clear_counts; + } + + packets = rc->total_packets; + bytes = rc->total_bytes; - usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_usec = rc->total_bytes / usecs; + if (i40e_container_is_rx(q_vector, rc)) { + /* If Rx there are 1 to 4 packets and bytes are less than + * 9000 assume insufficient data to use bulk rate limiting + * approach unless Tx is already in bulk rate limiting. We + * are likely latency driven. + */ + if (packets && packets < 4 && bytes < 9000 && + (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) { + itr = I40E_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + } else if (packets < 4) { + /* If we have Tx and Rx ITR maxed and Tx ITR is running in + * bulk mode and we are receiving 4 or fewer packets just + * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so + * that the Rx can relax. + */ + if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS && + (q_vector->rx.target_itr & I40E_ITR_MASK) == + I40E_ITR_ADAPTIVE_MAX_USECS) + goto clear_counts; + } else if (packets > 32) { + /* If we have processed over 32 packets in a single interrupt + * for Tx assume we need to switch over to "bulk" mode. + */ + rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY; + } - /* The calculations in this algorithm depend on interrupts actually - * firing at the ITR rate. This may not happen if the packet rate is - * really low, or if we've been napi polling. Check to make sure - * that's not the case before we continue. + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * Between 4 and 56 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. */ - estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); - if (estimated_usecs > usecs) { - new_latency_range = I40E_LOW_LATENCY; - goto reset_latency; + if (packets < 56) { + itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC; + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; + } + goto clear_counts; + } + + if (packets <= 256) { + itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); + itr &= I40E_ITR_MASK; + + /* Between 56 and 112 is our "goldilocks" zone where we are + * working out "just right". Just report that our current + * ITR is good for us. + */ + if (packets <= 112) + goto clear_counts; + + /* If packet count is 128 or greater we are likely looking + * at a slight overrun of the delay we want. Try halving + * our delay to see if that will cut the number of packets + * in half per interrupt. + */ + itr /= 2; + itr &= I40E_ITR_MASK; + if (itr < I40E_ITR_ADAPTIVE_MIN_USECS) + itr = I40E_ITR_ADAPTIVE_MIN_USECS; + + goto clear_counts; } - /* simple throttlerate management - * 0-10MB/s lowest (50000 ints/s) - * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (18000 ints/s) + /* The paths below assume we are dealing with a bulk ITR since + * number of packets is greater than 256. We are just going to have + * to compute a value and try to bring the count under control, + * though for smaller packet sizes there isn't much we can do as + * NAPI polling will likely be kicking in sooner rather than later. + */ + itr = I40E_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * give the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value * - * The math works out because the divisor is in 10^(-6) which - * turns the bytes/us input value into MB/s values, but - * make sure to use usecs, as the register values written - * are in 2 usec increments in the ITR registers, and make sure - * to use the smoothed values that the countdown timer gives us. + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. */ - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - if (bytes_per_usec > 10) - new_latency_range = I40E_LOW_LATENCY; - break; - case I40E_LOW_LATENCY: - if (bytes_per_usec > 20) - new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_usec <= 10) - new_latency_range = I40E_LOWEST_LATENCY; - break; - case I40E_BULK_LATENCY: - default: - if (bytes_per_usec <= 20) - new_latency_range = I40E_LOW_LATENCY; - break; + if (avg_wire_size <= 60) { + /* Start at 250k ints/sec */ + avg_wire_size = 4096; + } else if (avg_wire_size <= 380) { + /* 250K ints/sec to 60K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 1696; + } else if (avg_wire_size <= 1084) { + /* 60K ints/sec to 36K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 36K ints/sec to 30K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 30K ints/sec */ + avg_wire_size = 32256; } -reset_latency: - rc->latency_range = new_latency_range; + /* If we are in low latency mode halve our delay which doubles the + * rate to somewhere between 100K to 16K ints/sec + */ + if (itr & I40E_ITR_ADAPTIVE_LATENCY) + avg_wire_size /= 2; - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - new_itr = I40E_ITR_50K; - break; - case I40E_LOW_LATENCY: - new_itr = I40E_ITR_20K; - break; - case I40E_BULK_LATENCY: - new_itr = I40E_ITR_18K; - break; - default: - break; + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) * + I40E_ITR_ADAPTIVE_MIN_INC; + + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; } +clear_counts: + /* write back value */ + rc->target_itr = itr; + + /* next update should occur within next jiffy */ + rc->next_update = next_update + 1; + rc->total_bytes = 0; rc->total_packets = 0; - rc->last_itr_update = jiffies; - - if (new_itr != rc->itr) { - rc->itr = new_itr; - return true; - } - return false; } /** @@ -1273,7 +1415,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, * @rx_buffer: rx buffer to pull data from * * This function will clean up the contents of the rx_buffer. It will - * either recycle the bufer or unmap it and free the associated resources. + * either recycle the buffer or unmap it and free the associated resources. */ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer) @@ -1457,33 +1599,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_packets; } -static u32 i40e_buildreg_itr(const int type, const u16 itr) +static inline u32 i40e_buildreg_itr(const int type, u16 itr) { u32 val; + /* We don't bother with setting the CLEARPBA bit as the data sheet + * points out doing so is "meaningless since it was already + * auto-cleared". The auto-clearing happens when the interrupt is + * asserted. + * + * Hardware errata 28 for also indicates that writing to a + * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear + * an event in the PBA anyway so we need to rely on the automask + * to hold pending events for us until the interrupt is re-enabled + * + * The itr value is reported in microseconds, and the register + * value is recorded in 2 microsecond units. For this reason we + * only need to shift by the interval shift - 1 instead of the + * full value. + */ + itr &= I40E_ITR_MASK; + val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | - (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); + (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1)); return val; } /* a small macro to shorten up some long lines */ #define INTREG I40E_VFINT_DYN_CTLN1 -static inline int get_rx_itr(struct i40e_vsi *vsi, int idx) -{ - struct i40evf_adapter *adapter = vsi->back; - return adapter->rx_rings[idx].rx_itr_setting; -} - -static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) -{ - struct i40evf_adapter *adapter = vsi->back; - - return adapter->tx_rings[idx].tx_itr_setting; -} +/* The act of updating the ITR will cause it to immediately trigger. In order + * to prevent this from throwing off adaptive update statistics we defer the + * update so that it can only happen so often. So after either Tx or Rx are + * updated we make the adaptive scheme wait until either the ITR completely + * expires via the next_update expiration or we have been through at least + * 3 interrupts. + */ +#define ITR_COUNTDOWN_START 3 /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -1495,70 +1649,51 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { struct i40e_hw *hw = &vsi->back->hw; - bool rx = false, tx = false; - u32 rxval, txval; - int idx = q_vector->v_idx; - int rx_itr_setting, tx_itr_setting; - - /* avoid dynamic calculation if in countdown mode OR if - * all dynamic is disabled - */ - rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); - - rx_itr_setting = get_rx_itr(vsi, idx); - tx_itr_setting = get_tx_itr(vsi, idx); + u32 intval; - if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(rx_itr_setting) && - !ITR_IS_DYNAMIC(tx_itr_setting))) { - goto enable_int; - } - - if (ITR_IS_DYNAMIC(rx_itr_setting)) { - rx = i40e_set_new_dynamic_itr(&q_vector->rx); - rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); - } + /* These will do nothing if dynamic updates are not enabled */ + i40e_update_itr(q_vector, &q_vector->tx); + i40e_update_itr(q_vector, &q_vector->rx); - if (ITR_IS_DYNAMIC(tx_itr_setting)) { - tx = i40e_set_new_dynamic_itr(&q_vector->tx); - txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); - } - - if (rx || tx) { - /* get the higher of the two ITR adjustments and - * use the same value for both ITR registers - * when in adaptive mode (Rx and/or Tx) - */ - u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); - - q_vector->tx.itr = q_vector->rx.itr = itr; - txval = i40e_buildreg_itr(I40E_TX_ITR, itr); - tx = true; - rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); - rx = true; - } - - /* only need to enable the interrupt once, but need - * to possibly update both ITR values + /* This block of logic allows us to get away with only updating + * one ITR value with each interrupt. The idea is to perform a + * pseudo-lazy update with the following criteria. + * + * 1. Rx is given higher priority than Tx if both are in same state + * 2. If we must reduce an ITR that is given highest priority. + * 3. We then give priority to increasing ITR based on amount. */ - if (rx) { - /* set the INTENA_MSK_MASK so that this first write - * won't actually enable the interrupt, instead just - * updating the ITR (it's bit 31 PF and VF) + if (q_vector->rx.target_itr < q_vector->rx.current_itr) { + /* Rx ITR needs to be reduced, this is highest priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || + ((q_vector->rx.target_itr - q_vector->rx.current_itr) < + (q_vector->tx.target_itr - q_vector->tx.current_itr))) { + /* Tx ITR needs to be reduced, this is second priority + * Tx ITR needs to be increased more than Rx, fourth priority */ - rxval |= BIT(31); - /* don't check _DOWN because interrupt isn't being enabled */ - wr32(hw, INTREG(q_vector->reg_idx), rxval); + intval = i40e_buildreg_itr(I40E_TX_ITR, + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { + /* Rx ITR needs to be increased, third priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else { + /* No ITR update, lowest priority */ + intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (q_vector->itr_countdown) + q_vector->itr_countdown--; } -enable_int: if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), txval); - - if (q_vector->itr_countdown) - q_vector->itr_countdown--; - else - q_vector->itr_countdown = ITR_COUNTDOWN_START; + wr32(hw, INTREG(q_vector->reg_idx), intval); } /** diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 7798a6645c3f..9129447d079b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -28,31 +28,35 @@ #define _I40E_TXRX_H_ /* Interrupt Throttling and Rate Limiting Goodies */ - -#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */ -#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */ -#define I40E_ITR_100K 0x0005 -#define I40E_ITR_50K 0x000A -#define I40E_ITR_20K 0x0019 -#define I40E_ITR_18K 0x001B -#define I40E_ITR_8K 0x003E -#define I40E_ITR_4K 0x007A -#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ -#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ -#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ #define I40E_DEFAULT_IRQ_WORK 256 -#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1) -#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC)) -#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1) + +/* The datasheet for the X710 and XL710 indicate that the maximum value for + * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec + * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing + * the register value which is divided by 2 lets use the actual values and + * avoid an excessive amount of translation. + */ +#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ +#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */ +#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */ +#define I40E_ITR_100K 10 /* all values below must be even */ +#define I40E_ITR_50K 20 +#define I40E_ITR_20K 50 +#define I40E_ITR_18K 60 +#define I40E_ITR_8K 122 +#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */ +#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC) +#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK) +#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC)) + +#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) + /* 0x40 is the enable bit for interrupt rate limiting, and must be set if * the value of the rate limit is non-zero */ #define INTRL_ENA BIT(6) +#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) #define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0) #define I40E_INTRL_8K 125 /* 8000 ints/sec */ @@ -362,8 +366,7 @@ struct i40e_ring { * these values always store the USER setting, and must be converted * before programming to a register. */ - u16 rx_itr_setting; - u16 tx_itr_setting; + u16 itr_setting; u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -425,21 +428,21 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring) ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED; } -enum i40e_latency_range { - I40E_LOWEST_LATENCY = 0, - I40E_LOW_LATENCY = 1, - I40E_BULK_LATENCY = 2, -}; +#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002 +#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002 +#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e +#define I40E_ITR_ADAPTIVE_LATENCY 0x8000 +#define I40E_ITR_ADAPTIVE_BULK 0x0000 +#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY)) struct i40e_ring_container { - /* array of pointers to rings */ - struct i40e_ring *ring; + struct i40e_ring *ring; /* pointer to linked list of ring(s) */ + unsigned long next_update; /* jiffies value of next update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_packets; /* total packets processed this int */ - unsigned long last_itr_update; /* jiffies of last ITR update */ u16 count; - enum i40e_latency_range latency_range; - u16 itr; + u16 target_itr; /* target ITR setting for ring(s) */ + u16 current_itr; /* current ITR setting for ring(s) */ }; /* iterator for handling rings in ring container */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 9690c1ea019e..e46555ad7122 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -52,7 +52,10 @@ #include <linux/socket.h> #include <linux/jiffies.h> #include <net/ip6_checksum.h> +#include <net/pkt_cls.h> #include <net/udp.h> +#include <net/tc_act/tc_gact.h> +#include <net/tc_act/tc_mirred.h> #include "i40e_type.h" #include <linux/avf/virtchnl.h> @@ -106,6 +109,7 @@ struct i40e_vsi { #define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4) #define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4) +#define I40EVF_MBPS_DIVISOR 125000 /* divisor to convert to Mbps */ /* MAX_MSIX_Q_VECTORS of these are allocated, * but we only use one per queue-specific vector. @@ -117,9 +121,8 @@ struct i40e_q_vector { struct i40e_ring_container rx; struct i40e_ring_container tx; u32 ring_mask; + u8 itr_countdown; /* when 0 should adjust adaptive ITR */ u8 num_ringpairs; /* total number of ring pairs in vector */ -#define ITR_COUNTDOWN_START 100 - u8 itr_countdown; /* when 0 or 1 update ITR */ u16 v_idx; /* index in the vsi->q_vector array. */ u16 reg_idx; /* register index of the interrupt */ char name[IFNAMSIZ + 15]; @@ -169,6 +172,28 @@ struct i40evf_vlan_filter { bool add; /* filter needs to be added */ }; +#define I40EVF_MAX_TRAFFIC_CLASS 4 +/* State of traffic class creation */ +enum i40evf_tc_state_t { + __I40EVF_TC_INVALID, /* no traffic class, default state */ + __I40EVF_TC_RUNNING, /* traffic classes have been created */ +}; + +/* channel info */ +struct i40evf_channel_config { + struct virtchnl_channel_info ch_info[I40EVF_MAX_TRAFFIC_CLASS]; + enum i40evf_tc_state_t state; + u8 total_qps; +}; + +/* State of cloud filter */ +enum i40evf_cloud_filter_state_t { + __I40EVF_CF_INVALID, /* cloud filter not added */ + __I40EVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */ + __I40EVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */ + __I40EVF_CF_ACTIVE, /* cloud filter is active */ +}; + /* Driver state. The order of these is important! */ enum i40evf_state_t { __I40EVF_STARTUP, /* driver loaded, probe complete */ @@ -190,6 +215,36 @@ enum i40evf_critical_section_t { __I40EVF_IN_REMOVE_TASK, /* device being removed */ }; +#define I40EVF_CLOUD_FIELD_OMAC 0x01 +#define I40EVF_CLOUD_FIELD_IMAC 0x02 +#define I40EVF_CLOUD_FIELD_IVLAN 0x04 +#define I40EVF_CLOUD_FIELD_TEN_ID 0x08 +#define I40EVF_CLOUD_FIELD_IIP 0x10 + +#define I40EVF_CF_FLAGS_OMAC I40EVF_CLOUD_FIELD_OMAC +#define I40EVF_CF_FLAGS_IMAC I40EVF_CLOUD_FIELD_IMAC +#define I40EVF_CF_FLAGS_IMAC_IVLAN (I40EVF_CLOUD_FIELD_IMAC |\ + I40EVF_CLOUD_FIELD_IVLAN) +#define I40EVF_CF_FLAGS_IMAC_TEN_ID (I40EVF_CLOUD_FIELD_IMAC |\ + I40EVF_CLOUD_FIELD_TEN_ID) +#define I40EVF_CF_FLAGS_OMAC_TEN_ID_IMAC (I40EVF_CLOUD_FIELD_OMAC |\ + I40EVF_CLOUD_FIELD_IMAC |\ + I40EVF_CLOUD_FIELD_TEN_ID) +#define I40EVF_CF_FLAGS_IMAC_IVLAN_TEN_ID (I40EVF_CLOUD_FIELD_IMAC |\ + I40EVF_CLOUD_FIELD_IVLAN |\ + I40EVF_CLOUD_FIELD_TEN_ID) +#define I40EVF_CF_FLAGS_IIP I40E_CLOUD_FIELD_IIP + +/* bookkeeping of cloud filters */ +struct i40evf_cloud_filter { + enum i40evf_cloud_filter_state_t state; + struct list_head list; + struct virtchnl_filter f; + unsigned long cookie; + bool del; /* filter needs to be deleted */ + bool add; /* filter needs to be added */ +}; + /* board specific private data structure */ struct i40evf_adapter { struct timer_list watchdog_timer; @@ -241,6 +296,7 @@ struct i40evf_adapter { #define I40EVF_FLAG_ALLMULTI_ON BIT(14) #define I40EVF_FLAG_LEGACY_RX BIT(15) #define I40EVF_FLAG_REINIT_ITR_NEEDED BIT(16) +#define I40EVF_FLAG_QUEUES_DISABLED BIT(17) /* duplicates for common code */ #define I40E_FLAG_DCB_ENABLED 0 #define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED @@ -269,6 +325,10 @@ struct i40evf_adapter { #define I40EVF_FLAG_AQ_RELEASE_ALLMULTI BIT(18) #define I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING BIT(19) #define I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING BIT(20) +#define I40EVF_FLAG_AQ_ENABLE_CHANNELS BIT(21) +#define I40EVF_FLAG_AQ_DISABLE_CHANNELS BIT(22) +#define I40EVF_FLAG_AQ_ADD_CLOUD_FILTER BIT(23) +#define I40EVF_FLAG_AQ_DEL_CLOUD_FILTER BIT(24) /* OS defined structs */ struct net_device *netdev; @@ -314,6 +374,13 @@ struct i40evf_adapter { u16 rss_lut_size; u8 *rss_key; u8 *rss_lut; + /* ADQ related members */ + struct i40evf_channel_config ch_config; + u8 num_tc; + struct list_head cloud_filter_list; + /* lock to protest access to the cloud filter list */ + spinlock_t cloud_filter_list_lock; + u16 num_cloud_filters; }; @@ -380,4 +447,8 @@ void i40evf_notify_client_message(struct i40e_vsi *vsi, u8 *msg, u16 len); void i40evf_notify_client_l2_params(struct i40e_vsi *vsi); void i40evf_notify_client_open(struct i40e_vsi *vsi); void i40evf_notify_client_close(struct i40e_vsi *vsi, bool reset); +void i40evf_enable_channels(struct i40evf_adapter *adapter); +void i40evf_disable_channels(struct i40evf_adapter *adapter); +void i40evf_add_cloud_filter(struct i40evf_adapter *adapter); +void i40evf_del_cloud_filter(struct i40evf_adapter *adapter); #endif /* _I40EVF_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index e2d8aa19d205..e6793255de0b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -457,14 +457,14 @@ static int __i40evf_get_coalesce(struct net_device *netdev, rx_ring = &adapter->rx_rings[queue]; tx_ring = &adapter->tx_rings[queue]; - if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) + if (ITR_IS_DYNAMIC(rx_ring->itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC; return 0; } @@ -502,7 +502,7 @@ static int i40evf_get_per_queue_coalesce(struct net_device *netdev, /** * i40evf_set_itr_per_queue - set ITR values for specific queue - * @vsi: the VSI to set values for + * @adapter: the VF adapter struct to set values for * @ec: coalesce settings from ethtool * @queue: the queue to modify * @@ -514,33 +514,29 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter, { struct i40e_ring *rx_ring = &adapter->rx_rings[queue]; struct i40e_ring *tx_ring = &adapter->tx_rings[queue]; - struct i40e_vsi *vsi = &adapter->vsi; - struct i40e_hw *hw = &adapter->hw; struct i40e_q_vector *q_vector; - u16 vector; - rx_ring->rx_itr_setting = ec->rx_coalesce_usecs; - tx_ring->tx_itr_setting = ec->tx_coalesce_usecs; + rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); + tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); - rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC; + rx_ring->itr_setting |= I40E_ITR_DYNAMIC; if (!ec->use_adaptive_rx_coalesce) - rx_ring->rx_itr_setting ^= I40E_ITR_DYNAMIC; + rx_ring->itr_setting ^= I40E_ITR_DYNAMIC; - tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC; + tx_ring->itr_setting |= I40E_ITR_DYNAMIC; if (!ec->use_adaptive_tx_coalesce) - tx_ring->tx_itr_setting ^= I40E_ITR_DYNAMIC; + tx_ring->itr_setting ^= I40E_ITR_DYNAMIC; q_vector = rx_ring->q_vector; - q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector = tx_ring->q_vector; - q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); - i40e_flush(hw); + /* The interrupt handler itself will take care of programming + * the Tx and Rx ITR values based on the values we have entered + * into the q_vector, no need to write the values now. + */ } /** @@ -565,8 +561,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev, if (ec->rx_coalesce_usecs == 0) { if (ec->use_adaptive_rx_coalesce) netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); - } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || - (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + } else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) || + (ec->rx_coalesce_usecs > I40E_MAX_ITR)) { netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); return -EINVAL; } @@ -575,8 +571,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev, if (ec->tx_coalesce_usecs == 0) { if (ec->use_adaptive_tx_coalesce) netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); - } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || - (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + } else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) || + (ec->tx_coalesce_usecs > I40E_MAX_ITR)) { netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); return -EINVAL; } @@ -699,6 +695,12 @@ static int i40evf_set_channels(struct net_device *netdev, return -EINVAL; } + if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) { + dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n"); + return -EINVAL; + } + /* All of these should have already been checked by ethtool before this * even gets to us, but just to be sure. */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 16989ad2ca90..4955ce3ab6a2 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -353,11 +353,12 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) rx_ring->vsi = &adapter->vsi; q_vector->rx.ring = rx_ring; q_vector->rx.count++; - q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector->ring_mask |= BIT(r_idx); - q_vector->itr_countdown = ITR_COUNTDOWN_START; - wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx), + q_vector->rx.current_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; } /** @@ -378,11 +379,12 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) tx_ring->vsi = &adapter->vsi; q_vector->tx.ring = tx_ring; q_vector->tx.count++; - q_vector->tx.latency_range = I40E_LOW_LATENCY; - q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); - q_vector->itr_countdown = ITR_COUNTDOWN_START; + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); q_vector->num_ringpairs++; - wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx), + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; } /** @@ -783,7 +785,7 @@ static int i40evf_vlan_rx_kill_vid(struct net_device *netdev, **/ static struct i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter, - u8 *macaddr) + const u8 *macaddr) { struct i40evf_mac_filter *f; @@ -806,7 +808,7 @@ i40evf_mac_filter *i40evf_find_filter(struct i40evf_adapter *adapter, **/ static struct i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter, - u8 *macaddr) + const u8 *macaddr) { struct i40evf_mac_filter *f; @@ -878,50 +880,64 @@ static int i40evf_set_mac(struct net_device *netdev, void *p) } /** - * i40evf_set_rx_mode - NDO callback to set the netdev filters - * @netdev: network interface device structure - **/ -static void i40evf_set_rx_mode(struct net_device *netdev) + * i40evf_addr_sync - Callback for dev_(mc|uc)_sync to add address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be added. We call + * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. + */ +static int i40evf_addr_sync(struct net_device *netdev, const u8 *addr) { struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40evf_mac_filter *f, *ftmp; - struct netdev_hw_addr *uca; - struct netdev_hw_addr *mca; - struct netdev_hw_addr *ha; - - /* add addr if not already in the filter list */ - netdev_for_each_uc_addr(uca, netdev) { - i40evf_add_filter(adapter, uca->addr); - } - netdev_for_each_mc_addr(mca, netdev) { - i40evf_add_filter(adapter, mca->addr); - } - - spin_lock_bh(&adapter->mac_vlan_list_lock); - - list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { - netdev_for_each_mc_addr(mca, netdev) - if (ether_addr_equal(mca->addr, f->macaddr)) - goto bottom_of_search_loop; - netdev_for_each_uc_addr(uca, netdev) - if (ether_addr_equal(uca->addr, f->macaddr)) - goto bottom_of_search_loop; + if (i40evf_add_filter(adapter, addr)) + return 0; + else + return -ENOMEM; +} - for_each_dev_addr(netdev, ha) - if (ether_addr_equal(ha->addr, f->macaddr)) - goto bottom_of_search_loop; +/** + * i40evf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call + * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock. + */ +static int i40evf_addr_unsync(struct net_device *netdev, const u8 *addr) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + struct i40evf_mac_filter *f; - if (ether_addr_equal(f->macaddr, adapter->hw.mac.addr)) - goto bottom_of_search_loop; + /* Under some circumstances, we might receive a request to delete + * our own device address from our uc list. Because we store the + * device address in the VSI's MAC/VLAN filter list, we need to ignore + * such requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; - /* f->macaddr wasn't found in uc, mc, or ha list so delete it */ + f = i40evf_find_filter(adapter, addr); + if (f) { f->remove = true; adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER; - -bottom_of_search_loop: - continue; } + return 0; +} + +/** + * i40evf_set_rx_mode - NDO callback to set the netdev filters + * @netdev: network interface device structure + **/ +static void i40evf_set_rx_mode(struct net_device *netdev) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + + spin_lock_bh(&adapter->mac_vlan_list_lock); + __dev_uc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync); + __dev_mc_sync(netdev, i40evf_addr_sync, i40evf_addr_unsync); + spin_unlock_bh(&adapter->mac_vlan_list_lock); if (netdev->flags & IFF_PROMISC && !(adapter->flags & I40EVF_FLAG_PROMISC_ON)) @@ -936,8 +952,6 @@ bottom_of_search_loop: else if (!(netdev->flags & IFF_ALLMULTI) && adapter->flags & I40EVF_FLAG_ALLMULTI_ON) adapter->aq_required |= I40EVF_FLAG_AQ_RELEASE_ALLMULTI; - - spin_unlock_bh(&adapter->mac_vlan_list_lock); } /** @@ -1025,7 +1039,9 @@ static void i40evf_up_complete(struct i40evf_adapter *adapter) void i40evf_down(struct i40evf_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct i40evf_vlan_filter *vlf; struct i40evf_mac_filter *f; + struct i40evf_cloud_filter *cf; if (adapter->state <= __I40EVF_DOWN_PENDING) return; @@ -1038,17 +1054,29 @@ void i40evf_down(struct i40evf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); + /* clear the sync flag on all filters */ + __dev_uc_unsync(adapter->netdev, NULL); + __dev_mc_unsync(adapter->netdev, NULL); + /* remove all MAC filters */ list_for_each_entry(f, &adapter->mac_filter_list, list) { f->remove = true; } + /* remove all VLAN filters */ - list_for_each_entry(f, &adapter->vlan_filter_list, list) { - f->remove = true; + list_for_each_entry(vlf, &adapter->vlan_filter_list, list) { + vlf->remove = true; } spin_unlock_bh(&adapter->mac_vlan_list_lock); + /* remove all cloud filters */ + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + cf->del = true; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) && adapter->state != __I40EVF_RESETTING) { /* cancel any current operation */ @@ -1059,6 +1087,7 @@ void i40evf_down(struct i40evf_adapter *adapter) */ adapter->aq_required = I40EVF_FLAG_AQ_DEL_MAC_FILTER; adapter->aq_required |= I40EVF_FLAG_AQ_DEL_VLAN_FILTER; + adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER; adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES; } @@ -1144,6 +1173,9 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) */ if (adapter->num_req_queues) num_active_queues = adapter->num_req_queues; + else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) + num_active_queues = adapter->ch_config.total_qps; else num_active_queues = min_t(int, adapter->vsi_res->num_queue_pairs, @@ -1169,7 +1201,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->netdev = adapter->netdev; tx_ring->dev = &adapter->pdev->dev; tx_ring->count = adapter->tx_desc_count; - tx_ring->tx_itr_setting = I40E_ITR_TX_DEF; + tx_ring->itr_setting = I40E_ITR_TX_DEF; if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; @@ -1178,7 +1210,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; - rx_ring->rx_itr_setting = I40E_ITR_RX_DEF; + rx_ring->itr_setting = I40E_ITR_RX_DEF; } adapter->num_active_queues = num_active_queues; @@ -1471,6 +1503,16 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter) goto err_alloc_q_vectors; } + /* If we've made it so far while ADq flag being ON, then we haven't + * bailed out anywhere in middle. And ADq isn't just enabled but actual + * resources have been allocated in the reset path. + * Now we can truly claim that ADq is enabled. + */ + if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) + dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created", + adapter->num_tc); + dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u", (adapter->num_active_queues > 1) ? "Enabled" : "Disabled", adapter->num_active_queues); @@ -1712,6 +1754,27 @@ static void i40evf_watchdog_task(struct work_struct *work) i40evf_set_promiscuous(adapter, 0); goto watchdog_done; } + + if (adapter->aq_required & I40EVF_FLAG_AQ_ENABLE_CHANNELS) { + i40evf_enable_channels(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & I40EVF_FLAG_AQ_DISABLE_CHANNELS) { + i40evf_disable_channels(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & I40EVF_FLAG_AQ_ADD_CLOUD_FILTER) { + i40evf_add_cloud_filter(adapter); + goto watchdog_done; + } + + if (adapter->aq_required & I40EVF_FLAG_AQ_DEL_CLOUD_FILTER) { + i40evf_del_cloud_filter(adapter); + goto watchdog_done; + } + schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5)); if (adapter->state == __I40EVF_RUNNING) @@ -1735,6 +1798,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter) { struct i40evf_mac_filter *f, *ftmp; struct i40evf_vlan_filter *fv, *fvtmp; + struct i40evf_cloud_filter *cf, *cftmp; adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED; @@ -1756,7 +1820,7 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); - /* Delete all of the filters, both MAC and VLAN. */ + /* Delete all of the filters */ list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { list_del(&f->list); kfree(f); @@ -1769,6 +1833,14 @@ static void i40evf_disable_vf(struct i40evf_adapter *adapter) spin_unlock_bh(&adapter->mac_vlan_list_lock); + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + i40evf_free_misc_irq(adapter); i40evf_reset_interrupt_capability(adapter); i40evf_free_queues(adapter); @@ -1798,9 +1870,11 @@ static void i40evf_reset_task(struct work_struct *work) struct i40evf_adapter *adapter = container_of(work, struct i40evf_adapter, reset_task); + struct virtchnl_vf_resource *vfres = adapter->vf_res; struct net_device *netdev = adapter->netdev; struct i40e_hw *hw = &adapter->hw; struct i40evf_vlan_filter *vlf; + struct i40evf_cloud_filter *cf; struct i40evf_mac_filter *f; u32 reg_val; int i = 0, err; @@ -1893,6 +1967,7 @@ continue_reset: i40evf_free_all_rx_resources(adapter); i40evf_free_all_tx_resources(adapter); + adapter->flags |= I40EVF_FLAG_QUEUES_DISABLED; /* kill and reinit the admin queue */ i40evf_shutdown_adminq(hw); adapter->current_op = VIRTCHNL_OP_UNKNOWN; @@ -1924,8 +1999,19 @@ continue_reset: spin_unlock_bh(&adapter->mac_vlan_list_lock); + /* check if TCs are running and re-add all cloud filters */ + spin_lock_bh(&adapter->cloud_filter_list_lock); + if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && + adapter->num_tc) { + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + cf->add = true; + } + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER; adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER; + adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER; i40evf_misc_irq_enable(adapter); mod_timer(&adapter->watchdog_timer, jiffies + 2); @@ -2191,6 +2277,724 @@ void i40evf_free_all_rx_resources(struct i40evf_adapter *adapter) } /** + * i40evf_validate_tx_bandwidth - validate the max Tx bandwidth + * @adapter: board private structure + * @max_tx_rate: max Tx bw for a tc + **/ +static int i40evf_validate_tx_bandwidth(struct i40evf_adapter *adapter, + u64 max_tx_rate) +{ + int speed = 0, ret = 0; + + switch (adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + speed = 40000; + break; + case I40E_LINK_SPEED_25GB: + speed = 25000; + break; + case I40E_LINK_SPEED_20GB: + speed = 20000; + break; + case I40E_LINK_SPEED_10GB: + speed = 10000; + break; + case I40E_LINK_SPEED_1GB: + speed = 1000; + break; + case I40E_LINK_SPEED_100MB: + speed = 100; + break; + default: + break; + } + + if (max_tx_rate > speed) { + dev_err(&adapter->pdev->dev, + "Invalid tx rate specified\n"); + ret = -EINVAL; + } + + return ret; +} + +/** + * i40evf_validate_channel_config - validate queue mapping info + * @adapter: board private structure + * @mqprio_qopt: queue parameters + * + * This function validates if the config provided by the user to + * configure queue channels is valid or not. Returns 0 on a valid + * config. + **/ +static int i40evf_validate_ch_config(struct i40evf_adapter *adapter, + struct tc_mqprio_qopt_offload *mqprio_qopt) +{ + u64 total_max_rate = 0; + int i, num_qps = 0; + u64 tx_rate = 0; + int ret = 0; + + if (mqprio_qopt->qopt.num_tc > I40EVF_MAX_TRAFFIC_CLASS || + mqprio_qopt->qopt.num_tc < 1) + return -EINVAL; + + for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) { + if (!mqprio_qopt->qopt.count[i] || + mqprio_qopt->qopt.offset[i] != num_qps) + return -EINVAL; + if (mqprio_qopt->min_rate[i]) { + dev_err(&adapter->pdev->dev, + "Invalid min tx rate (greater than 0) specified\n"); + return -EINVAL; + } + /*convert to Mbps */ + tx_rate = div_u64(mqprio_qopt->max_rate[i], + I40EVF_MBPS_DIVISOR); + total_max_rate += tx_rate; + num_qps += mqprio_qopt->qopt.count[i]; + } + if (num_qps > MAX_QUEUES) + return -EINVAL; + + ret = i40evf_validate_tx_bandwidth(adapter, total_max_rate); + return ret; +} + +/** + * i40evf_del_all_cloud_filters - delete all cloud filters + * on the traffic classes + **/ +static void i40evf_del_all_cloud_filters(struct i40evf_adapter *adapter) +{ + struct i40evf_cloud_filter *cf, *cftmp; + + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, + list) { + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); +} + +/** + * __i40evf_setup_tc - configure multiple traffic classes + * @netdev: network interface device structure + * @type_date: tc offload data + * + * This function processes the config information provided by the + * user to configure traffic classes/queue channels and packages the + * information to request the PF to setup traffic classes. + * + * Returns 0 on success. + **/ +static int __i40evf_setup_tc(struct net_device *netdev, void *type_data) +{ + struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; + struct i40evf_adapter *adapter = netdev_priv(netdev); + struct virtchnl_vf_resource *vfres = adapter->vf_res; + u8 num_tc = 0, total_qps = 0; + int ret = 0, netdev_tc = 0; + u64 max_tx_rate; + u16 mode; + int i; + + num_tc = mqprio_qopt->qopt.num_tc; + mode = mqprio_qopt->mode; + + /* delete queue_channel */ + if (!mqprio_qopt->qopt.hw) { + if (adapter->ch_config.state == __I40EVF_TC_RUNNING) { + /* reset the tc configuration */ + netdev_reset_tc(netdev); + adapter->num_tc = 0; + netif_tx_stop_all_queues(netdev); + netif_tx_disable(netdev); + i40evf_del_all_cloud_filters(adapter); + adapter->aq_required = I40EVF_FLAG_AQ_DISABLE_CHANNELS; + goto exit; + } else { + return -EINVAL; + } + } + + /* add queue channel */ + if (mode == TC_MQPRIO_MODE_CHANNEL) { + if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) { + dev_err(&adapter->pdev->dev, "ADq not supported\n"); + return -EOPNOTSUPP; + } + if (adapter->ch_config.state != __I40EVF_TC_INVALID) { + dev_err(&adapter->pdev->dev, "TC configuration already exists\n"); + return -EINVAL; + } + + ret = i40evf_validate_ch_config(adapter, mqprio_qopt); + if (ret) + return ret; + /* Return if same TC config is requested */ + if (adapter->num_tc == num_tc) + return 0; + adapter->num_tc = num_tc; + + for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) { + if (i < num_tc) { + adapter->ch_config.ch_info[i].count = + mqprio_qopt->qopt.count[i]; + adapter->ch_config.ch_info[i].offset = + mqprio_qopt->qopt.offset[i]; + total_qps += mqprio_qopt->qopt.count[i]; + max_tx_rate = mqprio_qopt->max_rate[i]; + /* convert to Mbps */ + max_tx_rate = div_u64(max_tx_rate, + I40EVF_MBPS_DIVISOR); + adapter->ch_config.ch_info[i].max_tx_rate = + max_tx_rate; + } else { + adapter->ch_config.ch_info[i].count = 1; + adapter->ch_config.ch_info[i].offset = 0; + } + } + adapter->ch_config.total_qps = total_qps; + netif_tx_stop_all_queues(netdev); + netif_tx_disable(netdev); + adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_CHANNELS; + netdev_reset_tc(netdev); + /* Report the tc mapping up the stack */ + netdev_set_num_tc(adapter->netdev, num_tc); + for (i = 0; i < I40EVF_MAX_TRAFFIC_CLASS; i++) { + u16 qcount = mqprio_qopt->qopt.count[i]; + u16 qoffset = mqprio_qopt->qopt.offset[i]; + + if (i < num_tc) + netdev_set_tc_queue(netdev, netdev_tc++, qcount, + qoffset); + } + } +exit: + return ret; +} + +/** + * i40evf_parse_cls_flower - Parse tc flower filters provided by kernel + * @adapter: board private structure + * @cls_flower: pointer to struct tc_cls_flower_offload + * @filter: pointer to cloud filter structure + */ +static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter, + struct tc_cls_flower_offload *f, + struct i40evf_cloud_filter *filter) +{ + u16 n_proto_mask = 0; + u16 n_proto_key = 0; + u8 field_flags = 0; + u16 addr_type = 0; + u16 n_proto = 0; + int i = 0; + + if (f->dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) { + dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n", + f->dissector->used_keys); + return -EOPNOTSUPP; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + f->mask); + + if (mask->keyid != 0) + field_flags |= I40EVF_CLOUD_FIELD_TEN_ID; + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_dissector_key_basic *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->key); + + struct flow_dissector_key_basic *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_BASIC, + f->mask); + n_proto_key = ntohs(key->n_proto); + n_proto_mask = ntohs(mask->n_proto); + + if (n_proto_key == ETH_P_ALL) { + n_proto_key = 0; + n_proto_mask = 0; + } + n_proto = n_proto_key & n_proto_mask; + if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) + return -EINVAL; + if (n_proto == ETH_P_IPV6) { + /* specify flow type as TCP IPv6 */ + filter->f.flow_type = VIRTCHNL_TCP_V6_FLOW; + } + + if (key->ip_proto != IPPROTO_TCP) { + dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n"); + return -EINVAL; + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_dissector_key_eth_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->key); + + struct flow_dissector_key_eth_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_ETH_ADDRS, + f->mask); + /* use is_broadcast and is_zero to check for all 0xf or 0 */ + if (!is_zero_ether_addr(mask->dst)) { + if (is_broadcast_ether_addr(mask->dst)) { + field_flags |= I40EVF_CLOUD_FIELD_OMAC; + } else { + dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n", + mask->dst); + return I40E_ERR_CONFIG; + } + } + + if (!is_zero_ether_addr(mask->src)) { + if (is_broadcast_ether_addr(mask->src)) { + field_flags |= I40EVF_CLOUD_FIELD_IMAC; + } else { + dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n", + mask->src); + return I40E_ERR_CONFIG; + } + } + + if (!is_zero_ether_addr(key->dst)) + if (is_valid_ether_addr(key->dst) || + is_multicast_ether_addr(key->dst)) { + /* set the mask if a valid dst_mac address */ + for (i = 0; i < ETH_ALEN; i++) + filter->f.mask.tcp_spec.dst_mac[i] |= + 0xff; + ether_addr_copy(filter->f.data.tcp_spec.dst_mac, + key->dst); + } + + if (!is_zero_ether_addr(key->src)) + if (is_valid_ether_addr(key->src) || + is_multicast_ether_addr(key->src)) { + /* set the mask if a valid dst_mac address */ + for (i = 0; i < ETH_ALEN; i++) + filter->f.mask.tcp_spec.src_mac[i] |= + 0xff; + ether_addr_copy(filter->f.data.tcp_spec.src_mac, + key->src); + } + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + + if (mask->vlan_id) { + if (mask->vlan_id == VLAN_VID_MASK) { + field_flags |= I40EVF_CLOUD_FIELD_IVLAN; + } else { + dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n", + mask->vlan_id); + return I40E_ERR_CONFIG; + } + } + filter->f.mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff); + filter->f.data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id); + } + + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_dissector_key_control *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_CONTROL, + f->key); + + addr_type = key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_dissector_key_ipv4_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->key); + struct flow_dissector_key_ipv4_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, + f->mask); + + if (mask->dst) { + if (mask->dst == cpu_to_be32(0xffffffff)) { + field_flags |= I40EVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n", + be32_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + + if (mask->src) { + if (mask->src == cpu_to_be32(0xffffffff)) { + field_flags |= I40EVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n", + be32_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + + if (field_flags & I40EVF_CLOUD_FIELD_TEN_ID) { + dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n"); + return I40E_ERR_CONFIG; + } + if (key->dst) { + filter->f.mask.tcp_spec.dst_ip[0] |= + cpu_to_be32(0xffffffff); + filter->f.data.tcp_spec.dst_ip[0] = key->dst; + } + if (key->src) { + filter->f.mask.tcp_spec.src_ip[0] |= + cpu_to_be32(0xffffffff); + filter->f.data.tcp_spec.src_ip[0] = key->src; + } + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_dissector_key_ipv6_addrs *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->key); + struct flow_dissector_key_ipv6_addrs *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, + f->mask); + + /* validate mask, make sure it is not IPV6_ADDR_ANY */ + if (ipv6_addr_any(&mask->dst)) { + dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n", + IPV6_ADDR_ANY); + return I40E_ERR_CONFIG; + } + + /* src and dest IPv6 address should not be LOOPBACK + * (0:0:0:0:0:0:0:1) which can be represented as ::1 + */ + if (ipv6_addr_loopback(&key->dst) || + ipv6_addr_loopback(&key->src)) { + dev_err(&adapter->pdev->dev, + "ipv6 addr should not be loopback\n"); + return I40E_ERR_CONFIG; + } + if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src)) + field_flags |= I40EVF_CLOUD_FIELD_IIP; + + if (key->dst.s6_addr) { + for (i = 0; i < 4; i++) + filter->f.mask.tcp_spec.dst_ip[i] |= + cpu_to_be32(0xffffffff); + memcpy(&filter->f.data.tcp_spec.dst_ip, + &key->dst.s6_addr32, + sizeof(filter->f.data.tcp_spec.dst_ip)); + } + if (key->src.s6_addr) { + for (i = 0; i < 4; i++) + filter->f.mask.tcp_spec.src_ip[i] |= + cpu_to_be32(0xffffffff); + memcpy(&filter->f.data.tcp_spec.src_ip, + &key->src.s6_addr32, + sizeof(filter->f.data.tcp_spec.src_ip)); + } + } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_dissector_key_ports *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->key); + struct flow_dissector_key_ports *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_PORTS, + f->mask); + + if (mask->src) { + if (mask->src == cpu_to_be16(0xffff)) { + field_flags |= I40EVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad src port mask %u\n", + be16_to_cpu(mask->src)); + return I40E_ERR_CONFIG; + } + } + + if (mask->dst) { + if (mask->dst == cpu_to_be16(0xffff)) { + field_flags |= I40EVF_CLOUD_FIELD_IIP; + } else { + dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n", + be16_to_cpu(mask->dst)); + return I40E_ERR_CONFIG; + } + } + if (key->dst) { + filter->f.mask.tcp_spec.dst_port |= cpu_to_be16(0xffff); + filter->f.data.tcp_spec.dst_port = key->dst; + } + + if (key->src) { + filter->f.mask.tcp_spec.src_port |= cpu_to_be16(0xffff); + filter->f.data.tcp_spec.src_port = key->dst; + } + } + filter->f.field_flags = field_flags; + + return 0; +} + +/** + * i40evf_handle_tclass - Forward to a traffic class on the device + * @adapter: board private structure + * @tc: traffic class index on the device + * @filter: pointer to cloud filter structure + */ +static int i40evf_handle_tclass(struct i40evf_adapter *adapter, u32 tc, + struct i40evf_cloud_filter *filter) +{ + if (tc == 0) + return 0; + if (tc < adapter->num_tc) { + if (!filter->f.data.tcp_spec.dst_port) { + dev_err(&adapter->pdev->dev, + "Specify destination port to redirect to traffic class other than TC0\n"); + return -EINVAL; + } + } + /* redirect to a traffic class on the same device */ + filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT; + filter->f.action_meta = tc; + return 0; +} + +/** + * i40evf_configure_clsflower - Add tc flower filters + * @adapter: board private structure + * @cls_flower: Pointer to struct tc_cls_flower_offload + */ +static int i40evf_configure_clsflower(struct i40evf_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid); + struct i40evf_cloud_filter *filter = NULL; + int err = 0, count = 50; + + while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, + &adapter->crit_section)) { + udelay(1); + if (--count == 0) + return -EINVAL; + } + + if (tc < 0) { + dev_err(&adapter->pdev->dev, "Invalid traffic class\n"); + return -EINVAL; + } + + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) { + err = -ENOMEM; + goto clearout; + } + filter->cookie = cls_flower->cookie; + + /* set the mask to all zeroes to begin with */ + memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec)); + /* start out with flow type and eth type IPv4 to begin with */ + filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW; + err = i40evf_parse_cls_flower(adapter, cls_flower, filter); + if (err < 0) + goto err; + + err = i40evf_handle_tclass(adapter, tc, filter); + if (err < 0) + goto err; + + /* add filter to the list */ + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_add_tail(&filter->list, &adapter->cloud_filter_list); + adapter->num_cloud_filters++; + filter->add = true; + adapter->aq_required |= I40EVF_FLAG_AQ_ADD_CLOUD_FILTER; + spin_unlock_bh(&adapter->cloud_filter_list_lock); +err: + if (err) + kfree(filter); +clearout: + clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); + return err; +} + +/* i40evf_find_cf - Find the cloud filter in the list + * @adapter: Board private structure + * @cookie: filter specific cookie + * + * Returns ptr to the filter object or NULL. Must be called while holding the + * cloud_filter_list_lock. + */ +static struct i40evf_cloud_filter *i40evf_find_cf(struct i40evf_adapter *adapter, + unsigned long *cookie) +{ + struct i40evf_cloud_filter *filter = NULL; + + if (!cookie) + return NULL; + + list_for_each_entry(filter, &adapter->cloud_filter_list, list) { + if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie))) + return filter; + } + return NULL; +} + +/** + * i40evf_delete_clsflower - Remove tc flower filters + * @adapter: board private structure + * @cls_flower: Pointer to struct tc_cls_flower_offload + */ +static int i40evf_delete_clsflower(struct i40evf_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + struct i40evf_cloud_filter *filter = NULL; + int err = 0; + + spin_lock_bh(&adapter->cloud_filter_list_lock); + filter = i40evf_find_cf(adapter, &cls_flower->cookie); + if (filter) { + filter->del = true; + adapter->aq_required |= I40EVF_FLAG_AQ_DEL_CLOUD_FILTER; + } else { + err = -EINVAL; + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + + return err; +} + +/** + * i40evf_setup_tc_cls_flower - flower classifier offloads + * @netdev: net device to configure + * @type_data: offload data + */ +static int i40evf_setup_tc_cls_flower(struct i40evf_adapter *adapter, + struct tc_cls_flower_offload *cls_flower) +{ + if (cls_flower->common.chain_index) + return -EOPNOTSUPP; + + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return i40evf_configure_clsflower(adapter, cls_flower); + case TC_CLSFLOWER_DESTROY: + return i40evf_delete_clsflower(adapter, cls_flower); + case TC_CLSFLOWER_STATS: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +/** + * i40evf_setup_tc_block_cb - block callback for tc + * @type: type of offload + * @type_data: offload data + * @cb_priv: + * + * This function is the block callback for traffic classes + **/ +static int i40evf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + switch (type) { + case TC_SETUP_CLSFLOWER: + return i40evf_setup_tc_cls_flower(cb_priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +/** + * i40evf_setup_tc_block - register callbacks for tc + * @netdev: network interface device structure + * @f: tc offload data + * + * This function registers block callbacks for tc + * offloads + **/ +static int i40evf_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + struct i40evf_adapter *adapter = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, i40evf_setup_tc_block_cb, + adapter, adapter); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, i40evf_setup_tc_block_cb, + adapter); + return 0; + default: + return -EOPNOTSUPP; + } +} + +/** + * i40evf_setup_tc - configure multiple traffic classes + * @netdev: network interface device structure + * @type: type of offload + * @type_date: tc offload data + * + * This function is the callback to ndo_setup_tc in the + * netdev_ops. + * + * Returns 0 on success + **/ +static int i40evf_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_QDISC_MQPRIO: + return __i40evf_setup_tc(netdev, type_data); + case TC_SETUP_BLOCK: + return i40evf_setup_tc_block(netdev, type_data); + default: + return -EOPNOTSUPP; + } +} + +/** * i40evf_open - Called when a network interface is made active * @netdev: network interface device structure * @@ -2457,6 +3261,7 @@ static const struct net_device_ops i40evf_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40evf_netpoll, #endif + .ndo_setup_tc = i40evf_setup_tc, }; /** @@ -2571,6 +3376,9 @@ int i40evf_process_config(struct i40evf_adapter *adapter) if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) hw_features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); + /* Enable cloud filter if ADQ is supported */ + if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) + hw_features |= NETIF_F_HW_TC; netdev->hw_features |= hw_features; @@ -2938,9 +3746,11 @@ static int i40evf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mutex_init(&hw->aq.arq_mutex); spin_lock_init(&adapter->mac_vlan_list_lock); + spin_lock_init(&adapter->cloud_filter_list_lock); INIT_LIST_HEAD(&adapter->mac_filter_list); INIT_LIST_HEAD(&adapter->vlan_filter_list); + INIT_LIST_HEAD(&adapter->cloud_filter_list); INIT_WORK(&adapter->reset_task, i40evf_reset_task); INIT_WORK(&adapter->adminq_task, i40evf_adminq_task); @@ -3065,7 +3875,9 @@ static void i40evf_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct i40evf_adapter *adapter = netdev_priv(netdev); + struct i40evf_vlan_filter *vlf, *vlftmp; struct i40evf_mac_filter *f, *ftmp; + struct i40evf_cloud_filter *cf, *cftmp; struct i40e_hw *hw = &adapter->hw; int err; /* Indicate we are in remove and not to run reset_task */ @@ -3087,6 +3899,7 @@ static void i40evf_remove(struct pci_dev *pdev) /* Shut down all the garbage mashers on the detention level */ adapter->state = __I40EVF_REMOVE; adapter->aq_required = 0; + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; i40evf_request_reset(adapter); msleep(50); /* If the FW isn't responding, kick it once, but only once. */ @@ -3127,13 +3940,21 @@ static void i40evf_remove(struct pci_dev *pdev) list_del(&f->list); kfree(f); } - list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) { - list_del(&f->list); - kfree(f); + list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list, + list) { + list_del(&vlf->list); + kfree(vlf); } spin_unlock_bh(&adapter->mac_vlan_list_lock); + spin_lock_bh(&adapter->cloud_filter_list_lock); + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { + list_del(&cf->list); + kfree(cf); + } + spin_unlock_bh(&adapter->cloud_filter_list_lock); + free_netdev(netdev); pci_disable_pcie_error_reporting(pdev); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 50ce0d6c09ef..6134b61e0938 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -161,7 +161,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter) VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 | VIRTCHNL_VF_OFFLOAD_ENCAP | VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM | - VIRTCHNL_VF_OFFLOAD_REQ_QUEUES; + VIRTCHNL_VF_OFFLOAD_REQ_QUEUES | + VIRTCHNL_VF_OFFLOAD_ADQ; adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES; adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG; @@ -344,6 +345,7 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter) void i40evf_map_queues(struct i40evf_adapter *adapter) { struct virtchnl_irq_map_info *vimi; + struct virtchnl_vector_map *vecmap; int v_idx, q_vectors, len; struct i40e_q_vector *q_vector; @@ -367,17 +369,22 @@ void i40evf_map_queues(struct i40evf_adapter *adapter) vimi->num_vectors = adapter->num_msix_vectors; /* Queue vectors first */ for (v_idx = 0; v_idx < q_vectors; v_idx++) { - q_vector = adapter->q_vectors + v_idx; - vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; - vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS; - vimi->vecmap[v_idx].txq_map = q_vector->ring_mask; - vimi->vecmap[v_idx].rxq_map = q_vector->ring_mask; + q_vector = &adapter->q_vectors[v_idx]; + vecmap = &vimi->vecmap[v_idx]; + + vecmap->vsi_id = adapter->vsi_res->vsi_id; + vecmap->vector_id = v_idx + NONQ_VECS; + vecmap->txq_map = q_vector->ring_mask; + vecmap->rxq_map = q_vector->ring_mask; + vecmap->rxitr_idx = I40E_RX_ITR; + vecmap->txitr_idx = I40E_TX_ITR; } /* Misc vector last - this is only for AdminQ messages */ - vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; - vimi->vecmap[v_idx].vector_id = 0; - vimi->vecmap[v_idx].txq_map = 0; - vimi->vecmap[v_idx].rxq_map = 0; + vecmap = &vimi->vecmap[v_idx]; + vecmap->vsi_id = adapter->vsi_res->vsi_id; + vecmap->vector_id = 0; + vecmap->txq_map = 0; + vecmap->rxq_map = 0; adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS; i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP, @@ -459,7 +466,7 @@ void i40evf_add_ether_addrs(struct i40evf_adapter *adapter) more = true; } - veal = kzalloc(len, GFP_KERNEL); + veal = kzalloc(len, GFP_ATOMIC); if (!veal) { spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -532,7 +539,7 @@ void i40evf_del_ether_addrs(struct i40evf_adapter *adapter) (count * sizeof(struct virtchnl_ether_addr)); more = true; } - veal = kzalloc(len, GFP_KERNEL); + veal = kzalloc(len, GFP_ATOMIC); if (!veal) { spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -606,7 +613,7 @@ void i40evf_add_vlans(struct i40evf_adapter *adapter) (count * sizeof(u16)); more = true; } - vvfl = kzalloc(len, GFP_KERNEL); + vvfl = kzalloc(len, GFP_ATOMIC); if (!vvfl) { spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -678,7 +685,7 @@ void i40evf_del_vlans(struct i40evf_adapter *adapter) (count * sizeof(u16)); more = true; } - vvfl = kzalloc(len, GFP_KERNEL); + vvfl = kzalloc(len, GFP_ATOMIC); if (!vvfl) { spin_unlock_bh(&adapter->mac_vlan_list_lock); return; @@ -967,6 +974,201 @@ static void i40evf_print_link_message(struct i40evf_adapter *adapter) } /** + * i40evf_enable_channel + * @adapter: adapter structure + * + * Request that the PF enable channels as specified by + * the user via tc tool. + **/ +void i40evf_enable_channels(struct i40evf_adapter *adapter) +{ + struct virtchnl_tc_info *vti = NULL; + u16 len; + int i; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n", + adapter->current_op); + return; + } + + len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) + + sizeof(struct virtchnl_tc_info); + + vti = kzalloc(len, GFP_KERNEL); + if (!vti) + return; + vti->num_tc = adapter->num_tc; + for (i = 0; i < vti->num_tc; i++) { + vti->list[i].count = adapter->ch_config.ch_info[i].count; + vti->list[i].offset = adapter->ch_config.ch_info[i].offset; + vti->list[i].pad = 0; + vti->list[i].max_tx_rate = + adapter->ch_config.ch_info[i].max_tx_rate; + } + + adapter->ch_config.state = __I40EVF_TC_RUNNING; + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS; + adapter->aq_required &= ~I40EVF_FLAG_AQ_ENABLE_CHANNELS; + i40evf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS, + (u8 *)vti, len); + kfree(vti); +} + +/** + * i40evf_disable_channel + * @adapter: adapter structure + * + * Request that the PF disable channels that are configured + **/ +void i40evf_disable_channels(struct i40evf_adapter *adapter) +{ + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n", + adapter->current_op); + return; + } + + adapter->ch_config.state = __I40EVF_TC_INVALID; + adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS; + adapter->aq_required &= ~I40EVF_FLAG_AQ_DISABLE_CHANNELS; + i40evf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS, + NULL, 0); +} + +/** + * i40evf_print_cloud_filter + * @adapter: adapter structure + * @f: cloud filter to print + * + * Print the cloud filter + **/ +static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter, + struct virtchnl_filter f) +{ + switch (f.flow_type) { + case VIRTCHNL_TCP_V4_FLOW: + dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n", + &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac, + ntohs(f.data.tcp_spec.vlan_id), + &f.data.tcp_spec.dst_ip[0], &f.data.tcp_spec.src_ip[0], + ntohs(f.data.tcp_spec.dst_port), + ntohs(f.data.tcp_spec.src_port)); + break; + case VIRTCHNL_TCP_V6_FLOW: + dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n", + &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac, + ntohs(f.data.tcp_spec.vlan_id), + &f.data.tcp_spec.dst_ip, &f.data.tcp_spec.src_ip, + ntohs(f.data.tcp_spec.dst_port), + ntohs(f.data.tcp_spec.src_port)); + break; + } +} + +/** + * i40evf_add_cloud_filter + * @adapter: adapter structure + * + * Request that the PF add cloud filters as specified + * by the user via tc tool. + **/ +void i40evf_add_cloud_filter(struct i40evf_adapter *adapter) +{ + struct i40evf_cloud_filter *cf; + struct virtchnl_filter *f; + int len = 0, count = 0; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot add cloud filter, command %d pending\n", + adapter->current_op); + return; + } + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->add) { + count++; + break; + } + } + if (!count) { + adapter->aq_required &= ~I40EVF_FLAG_AQ_ADD_CLOUD_FILTER; + return; + } + adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER; + + len = sizeof(struct virtchnl_filter); + f = kzalloc(len, GFP_KERNEL); + if (!f) + return; + + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->add) { + memcpy(f, &cf->f, sizeof(struct virtchnl_filter)); + cf->add = false; + cf->state = __I40EVF_CF_ADD_PENDING; + i40evf_send_pf_msg(adapter, + VIRTCHNL_OP_ADD_CLOUD_FILTER, + (u8 *)f, len); + } + } + kfree(f); +} + +/** + * i40evf_del_cloud_filter + * @adapter: adapter structure + * + * Request that the PF delete cloud filters as specified + * by the user via tc tool. + **/ +void i40evf_del_cloud_filter(struct i40evf_adapter *adapter) +{ + struct i40evf_cloud_filter *cf, *cftmp; + struct virtchnl_filter *f; + int len = 0, count = 0; + + if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { + /* bail because we already have a command pending */ + dev_err(&adapter->pdev->dev, "Cannot remove cloud filter, command %d pending\n", + adapter->current_op); + return; + } + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->del) { + count++; + break; + } + } + if (!count) { + adapter->aq_required &= ~I40EVF_FLAG_AQ_DEL_CLOUD_FILTER; + return; + } + adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER; + + len = sizeof(struct virtchnl_filter); + f = kzalloc(len, GFP_KERNEL); + if (!f) + return; + + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) { + if (cf->del) { + memcpy(f, &cf->f, sizeof(struct virtchnl_filter)); + cf->del = false; + cf->state = __I40EVF_CF_DEL_PENDING; + i40evf_send_pf_msg(adapter, + VIRTCHNL_OP_DEL_CLOUD_FILTER, + (u8 *)f, len); + } + } + kfree(f); +} + +/** * i40evf_request_reset * @adapter: adapter structure * @@ -1011,14 +1213,25 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, if (adapter->link_up == link_up) break; - /* If we get link up message and start queues before - * our queues are configured it will trigger a TX hang. - * In that case, just ignore the link status message, - * we'll get another one after we enable queues and - * actually prepared to send traffic. - */ - if (link_up && adapter->state != __I40EVF_RUNNING) - break; + if (link_up) { + /* If we get link up message and start queues + * before our queues are configured it will + * trigger a TX hang. In that case, just ignore + * the link status message,we'll get another one + * after we enable queues and actually prepared + * to send traffic. + */ + if (adapter->state != __I40EVF_RUNNING) + break; + + /* For ADq enabled VF, we reconfigure VSIs and + * re-allocate queues. Hence wait till all + * queues are enabled. + */ + if (adapter->flags & + I40EVF_FLAG_QUEUES_DISABLED) + break; + } adapter->link_up = link_up; if (link_up) { @@ -1031,7 +1244,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, i40evf_print_link_message(adapter); break; case VIRTCHNL_EVENT_RESET_IMPENDING: - dev_info(&adapter->pdev->dev, "PF reset warning received\n"); + dev_info(&adapter->pdev->dev, "Reset warning received from the PF\n"); if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) { adapter->flags |= I40EVF_FLAG_RESET_PENDING; dev_info(&adapter->pdev->dev, "Scheduling reset task\n"); @@ -1063,6 +1276,57 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n", i40evf_stat_str(&adapter->hw, v_retval)); break; + case VIRTCHNL_OP_ENABLE_CHANNELS: + dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->ch_config.state = __I40EVF_TC_INVALID; + netdev_reset_tc(netdev); + netif_tx_start_all_queues(netdev); + break; + case VIRTCHNL_OP_DISABLE_CHANNELS: + dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n", + i40evf_stat_str(&adapter->hw, v_retval)); + adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED; + adapter->ch_config.state = __I40EVF_TC_RUNNING; + netif_tx_start_all_queues(netdev); + break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: { + struct i40evf_cloud_filter *cf, *cftmp; + + list_for_each_entry_safe(cf, cftmp, + &adapter->cloud_filter_list, + list) { + if (cf->state == __I40EVF_CF_ADD_PENDING) { + cf->state = __I40EVF_CF_INVALID; + dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n", + i40evf_stat_str(&adapter->hw, + v_retval)); + i40evf_print_cloud_filter(adapter, + cf->f); + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + } + } + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: { + struct i40evf_cloud_filter *cf; + + list_for_each_entry(cf, &adapter->cloud_filter_list, + list) { + if (cf->state == __I40EVF_CF_DEL_PENDING) { + cf->state = __I40EVF_CF_ACTIVE; + dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n", + i40evf_stat_str(&adapter->hw, + v_retval)); + i40evf_print_cloud_filter(adapter, + cf->f); + } + } + } + break; default: dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n", v_retval, @@ -1102,6 +1366,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, case VIRTCHNL_OP_ENABLE_QUEUES: /* enable transmits */ i40evf_irq_enable(adapter, true); + adapter->flags &= ~I40EVF_FLAG_QUEUES_DISABLED; break; case VIRTCHNL_OP_DISABLE_QUEUES: i40evf_free_all_tx_resources(adapter); @@ -1156,6 +1421,29 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, } } break; + case VIRTCHNL_OP_ADD_CLOUD_FILTER: { + struct i40evf_cloud_filter *cf; + + list_for_each_entry(cf, &adapter->cloud_filter_list, list) { + if (cf->state == __I40EVF_CF_ADD_PENDING) + cf->state = __I40EVF_CF_ACTIVE; + } + } + break; + case VIRTCHNL_OP_DEL_CLOUD_FILTER: { + struct i40evf_cloud_filter *cf, *cftmp; + + list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, + list) { + if (cf->state == __I40EVF_CF_DEL_PENDING) { + cf->state = __I40EVF_CF_INVALID; + list_del(&cf->list); + kfree(cf); + adapter->num_cloud_filters--; + } + } + } + break; default: if (adapter->current_op && (v_opcode != adapter->current_op)) dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n", diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 9463c3fa254f..0cadcabfe86f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -20,7 +20,7 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_cnt.o spectrum_fid.o \ spectrum_ipip.o spectrum_acl_flex_actions.o \ spectrum_mr.o spectrum_mr_tcam.o \ - spectrum_qdisc.o + spectrum_qdisc.o spectrum_span.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3dcc58d61506..5e8ea712caa2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -71,6 +71,7 @@ #include "spectrum_cnt.h" #include "spectrum_dpipe.h" #include "spectrum_acl_flex_actions.h" +#include "spectrum_span.h" #include "../mlxfw/mlxfw.h" #define MLXSW_FWREV_MAJOR 13 @@ -487,327 +488,6 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp) return 0; } -static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) -{ - int i; - - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN)) - return -EIO; - - mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, - MAX_SPAN); - mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count, - sizeof(struct mlxsw_sp_span_entry), - GFP_KERNEL); - if (!mlxsw_sp->span.entries) - return -ENOMEM; - - for (i = 0; i < mlxsw_sp->span.entries_count; i++) - INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list); - - return 0; -} - -static void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp) -{ - int i; - - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; - - WARN_ON_ONCE(!list_empty(&curr->bound_ports_list)); - } - kfree(mlxsw_sp->span.entries); -} - -static struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port) -{ - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - struct mlxsw_sp_span_entry *span_entry; - char mpat_pl[MLXSW_REG_MPAT_LEN]; - u8 local_port = port->local_port; - int index; - int i; - int err; - - /* find a free entry to use */ - index = -1; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - if (!mlxsw_sp->span.entries[i].used) { - index = i; - span_entry = &mlxsw_sp->span.entries[i]; - break; - } - } - if (index < 0) - return NULL; - - /* create a new port analayzer entry for local_port */ - mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); - if (err) - return NULL; - - span_entry->used = true; - span_entry->id = index; - span_entry->ref_count = 1; - span_entry->local_port = local_port; - return span_entry; -} - -static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_span_entry *span_entry) -{ - u8 local_port = span_entry->local_port; - char mpat_pl[MLXSW_REG_MPAT_LEN]; - int pa_id = span_entry->id; - - mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); - span_entry->used = false; -} - -struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port) -{ - int i; - - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; - - if (curr->used && curr->local_port == local_port) - return curr; - } - return NULL; -} - -static struct mlxsw_sp_span_entry -*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) -{ - struct mlxsw_sp_span_entry *span_entry; - - span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp, - port->local_port); - if (span_entry) { - /* Already exists, just take a reference */ - span_entry->ref_count++; - return span_entry; - } - - return mlxsw_sp_span_entry_create(port); -} - -static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_span_entry *span_entry) -{ - WARN_ON(!span_entry->ref_count); - if (--span_entry->ref_count == 0) - mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); - return 0; -} - -static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port) -{ - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - struct mlxsw_sp_span_inspected_port *p; - int i; - - for (i = 0; i < mlxsw_sp->span.entries_count; i++) { - struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; - - list_for_each_entry(p, &curr->bound_ports_list, list) - if (p->local_port == port->local_port && - p->type == MLXSW_SP_SPAN_EGRESS) - return true; - } - - return false; -} - -static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp, - int mtu) -{ - return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1; -} - -static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) -{ - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - char sbib_pl[MLXSW_REG_SBIB_LEN]; - int err; - - /* If port is egress mirrored, the shared buffer size should be - * updated according to the mtu value - */ - if (mlxsw_sp_span_is_egress_mirror(port)) { - u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu); - - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); - if (err) { - netdev_err(port->dev, "Could not update shared buffer for mirroring\n"); - return err; - } - } - - return 0; -} - -static struct mlxsw_sp_span_inspected_port * -mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port, - struct mlxsw_sp_span_entry *span_entry) -{ - struct mlxsw_sp_span_inspected_port *p; - - list_for_each_entry(p, &span_entry->bound_ports_list, list) - if (port->local_port == p->local_port) - return p; - return NULL; -} - -static int -mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, - struct mlxsw_sp_span_entry *span_entry, - enum mlxsw_sp_span_type type, - bool bind) -{ - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - char mpar_pl[MLXSW_REG_MPAR_LEN]; - int pa_id = span_entry->id; - - /* bind the port to the SPAN entry */ - mlxsw_reg_mpar_pack(mpar_pl, port->local_port, - (enum mlxsw_reg_mpar_i_e) type, bind, pa_id); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); -} - -static int -mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port, - struct mlxsw_sp_span_entry *span_entry, - enum mlxsw_sp_span_type type, - bool bind) -{ - struct mlxsw_sp_span_inspected_port *inspected_port; - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - char sbib_pl[MLXSW_REG_SBIB_LEN]; - int err; - - /* if it is an egress SPAN, bind a shared buffer to it */ - if (type == MLXSW_SP_SPAN_EGRESS) { - u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, - port->dev->mtu); - - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); - if (err) { - netdev_err(port->dev, "Could not create shared buffer for mirroring\n"); - return err; - } - } - - if (bind) { - err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type, - true); - if (err) - goto err_port_bind; - } - - inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL); - if (!inspected_port) { - err = -ENOMEM; - goto err_inspected_port_alloc; - } - inspected_port->local_port = port->local_port; - inspected_port->type = type; - list_add_tail(&inspected_port->list, &span_entry->bound_ports_list); - - return 0; - -err_inspected_port_alloc: - if (bind) - mlxsw_sp_span_inspected_port_bind(port, span_entry, type, - false); -err_port_bind: - if (type == MLXSW_SP_SPAN_EGRESS) { - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); - } - return err; -} - -static void -mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port, - struct mlxsw_sp_span_entry *span_entry, - enum mlxsw_sp_span_type type, - bool bind) -{ - struct mlxsw_sp_span_inspected_port *inspected_port; - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - char sbib_pl[MLXSW_REG_SBIB_LEN]; - - inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry); - if (!inspected_port) - return; - - if (bind) - mlxsw_sp_span_inspected_port_bind(port, span_entry, type, - false); - /* remove the SBIB buffer if it was egress SPAN */ - if (type == MLXSW_SP_SPAN_EGRESS) { - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); - } - - mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); - - list_del(&inspected_port->list); - kfree(inspected_port); -} - -int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, - struct mlxsw_sp_port *to, - enum mlxsw_sp_span_type type, bool bind) -{ - struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp; - struct mlxsw_sp_span_entry *span_entry; - int err; - - span_entry = mlxsw_sp_span_entry_get(to); - if (!span_entry) - return -ENOENT; - - netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n", - span_entry->id); - - err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind); - if (err) - goto err_port_bind; - - return 0; - -err_port_bind: - mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); - return err; -} - -void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port, - enum mlxsw_sp_span_type type, bool bind) -{ - struct mlxsw_sp_span_entry *span_entry; - - span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp, - destination_port); - if (!span_entry) { - netdev_err(from->dev, "no span entry found\n"); - return; - } - - netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n", - span_entry->id); - mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind); -} - static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable, u32 rate) { @@ -4556,13 +4236,11 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, u16 lag_id; if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) { - NL_SET_ERR_MSG(extack, - "spectrum: Exceeded number of supported LAG devices"); + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported LAG devices"); return false; } if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) { - NL_SET_ERR_MSG(extack, - "spectrum: LAG device using unsupported Tx type"); + NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type"); return false; } return true; @@ -4804,8 +4482,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, !netif_is_lag_master(upper_dev) && !netif_is_bridge_master(upper_dev) && !netif_is_ovs_master(upper_dev)) { - NL_SET_ERR_MSG(extack, - "spectrum: Unknown upper device type"); + NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); return -EINVAL; } if (!info->linking) @@ -4814,8 +4491,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev))) { - NL_SET_ERR_MSG(extack, - "spectrum: Enslaving a port to a device that already has an upper device is not supported"); + NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; } if (netif_is_lag_master(upper_dev) && @@ -4823,24 +4499,20 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, info->upper_info, extack)) return -EINVAL; if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) { - NL_SET_ERR_MSG(extack, - "spectrum: Master device is a LAG master and this device has a VLAN"); + NL_SET_ERR_MSG_MOD(extack, "Master device is a LAG master and this device has a VLAN"); return -EINVAL; } if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) { - NL_SET_ERR_MSG(extack, - "spectrum: Can not put a VLAN on a LAG port"); + NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on a LAG port"); return -EINVAL; } if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) { - NL_SET_ERR_MSG(extack, - "spectrum: Master device is an OVS master and this device has a VLAN"); + NL_SET_ERR_MSG_MOD(extack, "Master device is an OVS master and this device has a VLAN"); return -EINVAL; } if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) { - NL_SET_ERR_MSG(extack, - "spectrum: Can not put a VLAN on an OVS port"); + NL_SET_ERR_MSG_MOD(extack, "Can not put a VLAN on an OVS port"); return -EINVAL; } break; @@ -4953,7 +4625,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; if (!netif_is_bridge_master(upper_dev)) { - NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers"); + NL_SET_ERR_MSG_MOD(extack, "VLAN devices only support bridge and VRF uppers"); return -EINVAL; } if (!info->linking) @@ -4962,7 +4634,7 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev))) { - NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported"); + NL_SET_ERR_MSG_MOD(extack, "Enslaving a port to a device that already has an upper device is not supported"); return -EINVAL; } break; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index bdd8f94a452c..6718a1f0482c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -80,6 +80,7 @@ enum mlxsw_sp_resource_id { struct mlxsw_sp_port; struct mlxsw_sp_rif; +struct mlxsw_sp_span_entry; struct mlxsw_sp_upper { struct net_device *dev; @@ -111,25 +112,6 @@ struct mlxsw_sp_mid { unsigned long *ports_in_mid; /* bits array */ }; -enum mlxsw_sp_span_type { - MLXSW_SP_SPAN_EGRESS, - MLXSW_SP_SPAN_INGRESS -}; - -struct mlxsw_sp_span_inspected_port { - struct list_head list; - enum mlxsw_sp_span_type type; - u8 local_port; -}; - -struct mlxsw_sp_span_entry { - u8 local_port; - bool used; - struct list_head bound_ports_list; - int ref_count; - int id; -}; - enum mlxsw_sp_port_mall_action_type { MLXSW_SP_PORT_MALL_MIRROR, MLXSW_SP_PORT_MALL_SAMPLE, @@ -396,16 +378,6 @@ struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev); struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev); -int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, - struct mlxsw_sp_port *to, - enum mlxsw_sp_span_type type, - bool bind); -void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, - u8 destination_port, - enum mlxsw_sp_span_type type, - bool bind); -struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port); /* spectrum_dcb.c */ #ifdef CONFIG_MLXSW_SPECTRUM_DCB diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c index 6ca6894125f0..f7e61cecc42b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c @@ -35,6 +35,7 @@ #include "spectrum_acl_flex_actions.h" #include "core_acl_flex_actions.h" +#include "spectrum_span.h" #define MLXSW_SP_KVDL_ACT_EXT_SIZE 1 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 7502e53447bd..a1c4b1e63f8d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -37,122 +37,89 @@ #include "spectrum_ipip.h" struct ip_tunnel_parm -mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev) +mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev) { struct ip_tunnel *tun = netdev_priv(ol_dev); return tun->parms; } -static bool mlxsw_sp_ipip_parms_has_ikey(struct ip_tunnel_parm parms) +static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms) { return !!(parms.i_flags & TUNNEL_KEY); } -static bool mlxsw_sp_ipip_parms_has_okey(struct ip_tunnel_parm parms) +static bool mlxsw_sp_ipip_parms4_has_okey(struct ip_tunnel_parm parms) { return !!(parms.o_flags & TUNNEL_KEY); } -static u32 mlxsw_sp_ipip_parms_ikey(struct ip_tunnel_parm parms) +static u32 mlxsw_sp_ipip_parms4_ikey(struct ip_tunnel_parm parms) { - return mlxsw_sp_ipip_parms_has_ikey(parms) ? + return mlxsw_sp_ipip_parms4_has_ikey(parms) ? be32_to_cpu(parms.i_key) : 0; } -static u32 mlxsw_sp_ipip_parms_okey(struct ip_tunnel_parm parms) +static u32 mlxsw_sp_ipip_parms4_okey(struct ip_tunnel_parm parms) { - return mlxsw_sp_ipip_parms_has_okey(parms) ? + return mlxsw_sp_ipip_parms4_has_okey(parms) ? be32_to_cpu(parms.o_key) : 0; } -static __be32 mlxsw_sp_ipip_parms_saddr4(struct ip_tunnel_parm parms) +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms) { - return parms.iph.saddr; + return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.saddr }; } static union mlxsw_sp_l3addr -mlxsw_sp_ipip_parms_saddr(enum mlxsw_sp_l3proto proto, - struct ip_tunnel_parm parms) +mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms) { - switch (proto) { - case MLXSW_SP_L3_PROTO_IPV4: - return (union mlxsw_sp_l3addr) { - .addr4 = mlxsw_sp_ipip_parms_saddr4(parms), - }; - case MLXSW_SP_L3_PROTO_IPV6: - break; - } - - WARN_ON(1); - return (union mlxsw_sp_l3addr) { - .addr4 = 0, - }; + return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr }; } -static __be32 mlxsw_sp_ipip_parms_daddr4(struct ip_tunnel_parm parms) +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) { - return parms.iph.daddr; -} + struct ip_tunnel_parm parms4; -static union mlxsw_sp_l3addr -mlxsw_sp_ipip_parms_daddr(enum mlxsw_sp_l3proto proto, - struct ip_tunnel_parm parms) -{ switch (proto) { case MLXSW_SP_L3_PROTO_IPV4: - return (union mlxsw_sp_l3addr) { - .addr4 = mlxsw_sp_ipip_parms_daddr4(parms), - }; + parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); + return mlxsw_sp_ipip_parms4_saddr(parms4); case MLXSW_SP_L3_PROTO_IPV6: break; } WARN_ON(1); - return (union mlxsw_sp_l3addr) { - .addr4 = 0, - }; -} - -static bool mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev) -{ - return mlxsw_sp_ipip_parms_has_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev)); -} - -static bool mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev) -{ - return mlxsw_sp_ipip_parms_has_okey(mlxsw_sp_ipip_netdev_parms(ol_dev)); + return (union mlxsw_sp_l3addr) {0}; } -static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev) +static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) { - return mlxsw_sp_ipip_parms_ikey(mlxsw_sp_ipip_netdev_parms(ol_dev)); -} -static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev) -{ - return mlxsw_sp_ipip_parms_okey(mlxsw_sp_ipip_netdev_parms(ol_dev)); -} + struct ip_tunnel_parm parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); -union mlxsw_sp_l3addr -mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, - const struct net_device *ol_dev) -{ - return mlxsw_sp_ipip_parms_saddr(proto, - mlxsw_sp_ipip_netdev_parms(ol_dev)); -} - -static __be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) -{ - return mlxsw_sp_ipip_parms_daddr4(mlxsw_sp_ipip_netdev_parms(ol_dev)); + return mlxsw_sp_ipip_parms4_daddr(parms4).addr4; } static union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev) { - return mlxsw_sp_ipip_parms_daddr(proto, - mlxsw_sp_ipip_netdev_parms(ol_dev)); + struct ip_tunnel_parm parms4; + + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); + return mlxsw_sp_ipip_parms4_daddr(parms4); + case MLXSW_SP_L3_PROTO_IPV6: + break; + } + + WARN_ON(1); + return (union mlxsw_sp_l3addr) {0}; } static int @@ -176,12 +143,17 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, u32 tunnel_index, struct mlxsw_sp_ipip_entry *ipip_entry) { - bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev); u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); - u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev); char rtdp_pl[MLXSW_REG_RTDP_LEN]; + struct ip_tunnel_parm parms; unsigned int type_check; + bool has_ikey; u32 daddr4; + u32 ikey; + + parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev); + has_ikey = mlxsw_sp_ipip_parms4_has_ikey(parms); + ikey = mlxsw_sp_ipip_parms4_ikey(parms); mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index); @@ -273,14 +245,15 @@ static struct mlxsw_sp_rif_ipip_lb_config mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp, const struct net_device *ol_dev) { + struct ip_tunnel_parm parms = mlxsw_sp_ipip_netdev_parms4(ol_dev); enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; - lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ? + lb_ipipt = mlxsw_sp_ipip_parms4_has_okey(parms) ? MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP : MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP; return (struct mlxsw_sp_rif_ipip_lb_config){ .lb_ipipt = lb_ipipt, - .okey = mlxsw_sp_ipip_netdev_okey(ol_dev), + .okey = mlxsw_sp_ipip_parms4_okey(parms), .ul_protocol = MLXSW_SP_L3_PROTO_IPV4, .saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4, ol_dev), @@ -300,16 +273,12 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp, bool update_nhs = false; int err = 0; - new_parms = mlxsw_sp_ipip_netdev_parms(ipip_entry->ol_dev); + new_parms = mlxsw_sp_ipip_netdev_parms4(ipip_entry->ol_dev); - new_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4, - new_parms); - old_saddr = mlxsw_sp_ipip_parms_saddr(MLXSW_SP_L3_PROTO_IPV4, - ipip_entry->parms); - new_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4, - new_parms); - old_daddr = mlxsw_sp_ipip_parms_daddr(MLXSW_SP_L3_PROTO_IPV4, - ipip_entry->parms); + new_saddr = mlxsw_sp_ipip_parms4_saddr(new_parms); + old_saddr = mlxsw_sp_ipip_parms4_saddr(ipip_entry->parms4); + new_daddr = mlxsw_sp_ipip_parms4_daddr(new_parms); + old_daddr = mlxsw_sp_ipip_parms4_daddr(ipip_entry->parms4); if (!mlxsw_sp_l3addr_eq(&new_saddr, &old_saddr)) { u16 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); @@ -326,14 +295,14 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp, } update_tunnel = true; - } else if ((mlxsw_sp_ipip_parms_okey(ipip_entry->parms) != - mlxsw_sp_ipip_parms_okey(new_parms)) || - ipip_entry->parms.link != new_parms.link) { + } else if ((mlxsw_sp_ipip_parms4_okey(ipip_entry->parms4) != + mlxsw_sp_ipip_parms4_okey(new_parms)) || + ipip_entry->parms4.link != new_parms.link) { update_tunnel = true; } else if (!mlxsw_sp_l3addr_eq(&new_daddr, &old_daddr)) { update_nhs = true; - } else if (mlxsw_sp_ipip_parms_ikey(ipip_entry->parms) != - mlxsw_sp_ipip_parms_ikey(new_parms)) { + } else if (mlxsw_sp_ipip_parms4_ikey(ipip_entry->parms4) != + mlxsw_sp_ipip_parms4_ikey(new_parms)) { update_decap = true; } @@ -350,7 +319,7 @@ mlxsw_sp_ipip_ol_netdev_change_gre4(struct mlxsw_sp *mlxsw_sp, false, false, false, extack); - ipip_entry->parms = new_parms; + ipip_entry->parms4 = new_parms; return err; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 04b08d9d76e9..a4ff5737eccc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -37,9 +37,10 @@ #include "spectrum_router.h" #include <net/ip_fib.h> +#include <linux/if_tunnel.h> struct ip_tunnel_parm -mlxsw_sp_ipip_netdev_parms(const struct net_device *ol_dev); +mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev); union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, @@ -56,7 +57,9 @@ struct mlxsw_sp_ipip_entry { struct mlxsw_sp_rif_ipip_lb *ol_lb; struct mlxsw_sp_fib_entry *decap_fib_entry; struct list_head ipip_list_node; - struct ip_tunnel_parm parms; + union { + struct ip_tunnel_parm parms4; + }; }; struct mlxsw_sp_ipip_ops { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index f7948e983637..05146970c19c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1,10 +1,10 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c - * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> - * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> + * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -796,7 +796,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, vr = mlxsw_sp_vr_find_unused(mlxsw_sp); if (!vr) { - NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers"); + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers"); return ERR_PTR(-EBUSY); } fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); @@ -1024,9 +1024,11 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_ipip_type ipipt, struct net_device *ol_dev) { + const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_ipip_entry *ret = NULL; + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL); if (!ipip_entry) return ERR_PTR(-ENOMEM); @@ -1040,7 +1042,15 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, ipip_entry->ipipt = ipipt; ipip_entry->ol_dev = ol_dev; - ipip_entry->parms = mlxsw_sp_ipip_netdev_parms(ol_dev); + + switch (ipip_ops->ul_proto) { + case MLXSW_SP_L3_PROTO_IPV4: + ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); + break; + case MLXSW_SP_L3_PROTO_IPV6: + WARN_ON(1); + break; + } return ipip_entry; @@ -5793,7 +5803,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event, } if (err < 0) - NL_SET_ERR_MSG(extack, "spectrum: FIB rules not supported. Aborting offload"); + NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported. Aborting offload"); return err; } @@ -6032,7 +6042,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); if (err) { - NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces"); + NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); goto err_rif_index_alloc; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c new file mode 100644 index 000000000000..c3bec37d71ed --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -0,0 +1,356 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/list.h> + +#include "spectrum.h" +#include "spectrum_span.h" + +int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN)) + return -EIO; + + mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_SPAN); + mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count, + sizeof(struct mlxsw_sp_span_entry), + GFP_KERNEL); + if (!mlxsw_sp->span.entries) + return -ENOMEM; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) + INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list); + + return 0; +} + +void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + WARN_ON_ONCE(!list_empty(&curr->bound_ports_list)); + } + kfree(mlxsw_sp->span.entries); +} + +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + struct mlxsw_sp_span_entry *span_entry; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + u8 local_port = port->local_port; + int index; + int i; + int err; + + /* find a free entry to use */ + index = -1; + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + if (!mlxsw_sp->span.entries[i].ref_count) { + index = i; + span_entry = &mlxsw_sp->span.entries[i]; + break; + } + } + if (index < 0) + return NULL; + + /* create a new port analayzer entry for local_port */ + mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); + if (err) + return NULL; + + span_entry->id = index; + span_entry->ref_count = 1; + span_entry->local_port = local_port; + return span_entry; +} + +static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) +{ + u8 local_port = span_entry->local_port; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + int pa_id = span_entry->id; + + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); +} + +struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port) +{ + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + if (curr->ref_count && curr->local_port == local_port) + return curr; + } + return NULL; +} + +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp_span_entry *span_entry; + + span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp, + port->local_port); + if (span_entry) { + /* Already exists, just take a reference */ + span_entry->ref_count++; + return span_entry; + } + + return mlxsw_sp_span_entry_create(port); +} + +static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) +{ + WARN_ON(!span_entry->ref_count); + if (--span_entry->ref_count == 0) + mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); + return 0; +} + +static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + struct mlxsw_sp_span_inspected_port *p; + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + list_for_each_entry(p, &curr->bound_ports_list, list) + if (p->local_port == port->local_port && + p->type == MLXSW_SP_SPAN_EGRESS) + return true; + } + + return false; +} + +static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp, + int mtu) +{ + return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1; +} + +int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + int err; + + /* If port is egress mirrored, the shared buffer size should be + * updated according to the mtu value + */ + if (mlxsw_sp_span_is_egress_mirror(port)) { + u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu); + + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + if (err) { + netdev_err(port->dev, "Could not update shared buffer for mirroring\n"); + return err; + } + } + + return 0; +} + +static struct mlxsw_sp_span_inspected_port * +mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry) +{ + struct mlxsw_sp_span_inspected_port *p; + + list_for_each_entry(p, &span_entry->bound_ports_list, list) + if (port->local_port == p->local_port) + return p; + return NULL; +} + +static int +mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type, + bool bind) +{ + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char mpar_pl[MLXSW_REG_MPAR_LEN]; + int pa_id = span_entry->id; + + /* bind the port to the SPAN entry */ + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, + (enum mlxsw_reg_mpar_i_e)type, bind, pa_id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); +} + +static int +mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type, + bool bind) +{ + struct mlxsw_sp_span_inspected_port *inspected_port; + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + int err; + + /* if it is an egress SPAN, bind a shared buffer to it */ + if (type == MLXSW_SP_SPAN_EGRESS) { + u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, + port->dev->mtu); + + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + if (err) { + netdev_err(port->dev, "Could not create shared buffer for mirroring\n"); + return err; + } + } + + if (bind) { + err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type, + true); + if (err) + goto err_port_bind; + } + + inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL); + if (!inspected_port) { + err = -ENOMEM; + goto err_inspected_port_alloc; + } + inspected_port->local_port = port->local_port; + inspected_port->type = type; + list_add_tail(&inspected_port->list, &span_entry->bound_ports_list); + + return 0; + +err_inspected_port_alloc: + if (bind) + mlxsw_sp_span_inspected_port_bind(port, span_entry, type, + false); +err_port_bind: + if (type == MLXSW_SP_SPAN_EGRESS) { + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + } + return err; +} + +static void +mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port, + struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_sp_span_type type, + bool bind) +{ + struct mlxsw_sp_span_inspected_port *inspected_port; + struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; + char sbib_pl[MLXSW_REG_SBIB_LEN]; + + inspected_port = mlxsw_sp_span_entry_bound_port_find(port, span_entry); + if (!inspected_port) + return; + + if (bind) + mlxsw_sp_span_inspected_port_bind(port, span_entry, type, + false); + /* remove the SBIB buffer if it was egress SPAN */ + if (type == MLXSW_SP_SPAN_EGRESS) { + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); + } + + mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); + + list_del(&inspected_port->list); + kfree(inspected_port); +} + +int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, + struct mlxsw_sp_port *to, + enum mlxsw_sp_span_type type, bool bind) +{ + struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp; + struct mlxsw_sp_span_entry *span_entry; + int err; + + span_entry = mlxsw_sp_span_entry_get(to); + if (!span_entry) + return -ENOENT; + + netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n", + span_entry->id); + + err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind); + if (err) + goto err_port_bind; + + return 0; + +err_port_bind: + mlxsw_sp_span_entry_put(mlxsw_sp, span_entry); + return err; +} + +void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port, + enum mlxsw_sp_span_type type, bool bind) +{ + struct mlxsw_sp_span_entry *span_entry; + + span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp, + destination_port); + if (!span_entry) { + netdev_err(from->dev, "no span entry found\n"); + return; + } + + netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n", + span_entry->id); + mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h new file mode 100644 index 000000000000..069050e385ff --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h @@ -0,0 +1,73 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.h + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_SPAN_H +#define _MLXSW_SPECTRUM_SPAN_H + +#include <linux/types.h> + +struct mlxsw_sp; +struct mlxsw_sp_port; + +enum mlxsw_sp_span_type { + MLXSW_SP_SPAN_EGRESS, + MLXSW_SP_SPAN_INGRESS +}; + +struct mlxsw_sp_span_inspected_port { + struct list_head list; + enum mlxsw_sp_span_type type; + u8 local_port; +}; + +struct mlxsw_sp_span_entry { + u8 local_port; + struct list_head bound_ports_list; + int ref_count; + int id; +}; + +int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp); + +int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, + struct mlxsw_sp_port *to, + enum mlxsw_sp_span_type type, bool bind); +void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port, + enum mlxsw_sp_span_type type, bool bind); +struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port); + +int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 593ad31be749..f9f53af04fe1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1819,7 +1819,7 @@ mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; if (is_vlan_dev(bridge_port->dev)) { - NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge"); + NL_SET_ERR_MSG_MOD(extack, "Can not enslave a VLAN device to a VLAN-aware bridge"); return -EINVAL; } @@ -1885,7 +1885,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, u16 vid; if (!is_vlan_dev(bridge_port->dev)) { - NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge"); + NL_SET_ERR_MSG_MOD(extack, "Only VLAN devices can be enslaved to a VLAN-unaware bridge"); return -EINVAL; } vid = vlan_dev_vlan_id(bridge_port->dev); @@ -1895,7 +1895,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, return -EINVAL; if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) { - NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port"); + NL_SET_ERR_MSG_MOD(extack, "Can not bridge VLAN uppers of the same port"); return -EINVAL; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index adfe474c2cf0..28c1cd5b823b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -61,6 +61,13 @@ #define NFP_FLOWER_MASK_MPLS_BOS BIT(8) #define NFP_FLOWER_MASK_MPLS_Q BIT(0) +/* Compressed HW representation of TCP Flags */ +#define NFP_FL_TCP_FLAG_URG BIT(4) +#define NFP_FL_TCP_FLAG_PSH BIT(3) +#define NFP_FL_TCP_FLAG_RST BIT(2) +#define NFP_FL_TCP_FLAG_SYN BIT(1) +#define NFP_FL_TCP_FLAG_FIN BIT(0) + #define NFP_FL_SC_ACT_DROP 0x80000000 #define NFP_FL_SC_ACT_USER 0x7D000000 #define NFP_FL_SC_ACT_POPV 0x6A000000 @@ -257,7 +264,7 @@ struct nfp_flower_tp_ports { * 3 2 1 * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - * | DSCP |ECN| protocol | reserved | + * | DSCP |ECN| protocol | ttl | flags | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | ipv4_addr_src | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -268,7 +275,7 @@ struct nfp_flower_ipv4 { u8 tos; u8 proto; u8 ttl; - u8 reserved; + u8 flags; __be32 ipv4_src; __be32 ipv4_dst; }; diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 332ff0fdc038..c5cebf6fb1d3 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -41,6 +41,7 @@ #include <linux/time64.h> #include <linux/types.h> #include <net/pkt_cls.h> +#include <net/tcp.h> #include <linux/workqueue.h> struct net_device; diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 37c2ecae2a7a..b3bc8279d4fb 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -181,6 +181,26 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame, frame->tos = flow_ip->tos; frame->ttl = flow_ip->ttl; } + + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_dissector_key_tcp *tcp; + u32 tcp_flags; + + tcp = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_TCP, target); + tcp_flags = be16_to_cpu(tcp->flags); + + if (tcp_flags & TCPHDR_FIN) + frame->flags |= NFP_FL_TCP_FLAG_FIN; + if (tcp_flags & TCPHDR_SYN) + frame->flags |= NFP_FL_TCP_FLAG_SYN; + if (tcp_flags & TCPHDR_RST) + frame->flags |= NFP_FL_TCP_FLAG_RST; + if (tcp_flags & TCPHDR_PSH) + frame->flags |= NFP_FL_TCP_FLAG_PSH; + if (tcp_flags & TCPHDR_URG) + frame->flags |= NFP_FL_TCP_FLAG_URG; + } } static void diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index eb5c13dea8f5..f3586c519805 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -44,11 +44,16 @@ #include "../nfp_net.h" #include "../nfp_port.h" +#define NFP_FLOWER_SUPPORTED_TCPFLAGS \ + (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST | \ + TCPHDR_PSH | TCPHDR_URG) + #define NFP_FLOWER_WHITELIST_DISSECTOR \ (BIT(FLOW_DISSECTOR_KEY_CONTROL) | \ BIT(FLOW_DISSECTOR_KEY_BASIC) | \ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | \ + BIT(FLOW_DISSECTOR_KEY_TCP) | \ BIT(FLOW_DISSECTOR_KEY_PORTS) | \ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \ BIT(FLOW_DISSECTOR_KEY_VLAN) | \ @@ -288,6 +293,35 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, } } + if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_dissector_key_tcp *tcp; + u32 tcp_flags; + + tcp = skb_flow_dissector_target(flow->dissector, + FLOW_DISSECTOR_KEY_TCP, + flow->key); + tcp_flags = be16_to_cpu(tcp->flags); + + if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS) + return -EOPNOTSUPP; + + /* We only support PSH and URG flags when either + * FIN, SYN or RST is present as well. + */ + if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) && + !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST))) + return -EOPNOTSUPP; + + /* We need to store TCP flags in the IPv4 key space, thus + * we need to ensure we include a IPv4 key layer if we have + * not done so already. + */ + if (!(key_layer & NFP_FLOWER_LAYER_IPV4)) { + key_layer |= NFP_FLOWER_LAYER_IPV4; + key_size += sizeof(struct nfp_flower_ipv4); + } + } + ret_key_ls->key_layer = key_layer; ret_key_ls->key_layer_two = key_layer_two; ret_key_ls->key_size = key_size; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 4499a7333078..bb63c115537d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015-2017 Netronome Systems, Inc. + * Copyright (C) 2015-2018 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -51,12 +51,12 @@ * The configuration BAR is 8K in size, but due to * THB-350, 32k needs to be reserved. */ -#define NFP_NET_CFG_BAR_SZ (32 * 1024) +#define NFP_NET_CFG_BAR_SZ (32 * 1024) /** * Offset in Freelist buffer where packet starts on RX */ -#define NFP_NET_RX_OFFSET 32 +#define NFP_NET_RX_OFFSET 32 /** * LSO parameters @@ -75,65 +75,65 @@ #define NFP_NET_META_PORTID 5 #define NFP_NET_META_CSUM 6 /* checksum complete type */ -#define NFP_META_PORT_ID_CTRL ~0U +#define NFP_META_PORT_ID_CTRL ~0U /** * Hash type pre-pended when a RSS hash was computed */ -#define NFP_NET_RSS_NONE 0 -#define NFP_NET_RSS_IPV4 1 -#define NFP_NET_RSS_IPV6 2 -#define NFP_NET_RSS_IPV6_EX 3 -#define NFP_NET_RSS_IPV4_TCP 4 -#define NFP_NET_RSS_IPV6_TCP 5 -#define NFP_NET_RSS_IPV6_EX_TCP 6 -#define NFP_NET_RSS_IPV4_UDP 7 -#define NFP_NET_RSS_IPV6_UDP 8 -#define NFP_NET_RSS_IPV6_EX_UDP 9 +#define NFP_NET_RSS_NONE 0 +#define NFP_NET_RSS_IPV4 1 +#define NFP_NET_RSS_IPV6 2 +#define NFP_NET_RSS_IPV6_EX 3 +#define NFP_NET_RSS_IPV4_TCP 4 +#define NFP_NET_RSS_IPV6_TCP 5 +#define NFP_NET_RSS_IPV6_EX_TCP 6 +#define NFP_NET_RSS_IPV4_UDP 7 +#define NFP_NET_RSS_IPV6_UDP 8 +#define NFP_NET_RSS_IPV6_EX_UDP 9 /** * Ring counts - * %NFP_NET_TXR_MAX: Maximum number of TX rings - * %NFP_NET_RXR_MAX: Maximum number of RX rings + * %NFP_NET_TXR_MAX: Maximum number of TX rings + * %NFP_NET_RXR_MAX: Maximum number of RX rings */ -#define NFP_NET_TXR_MAX 64 -#define NFP_NET_RXR_MAX 64 +#define NFP_NET_TXR_MAX 64 +#define NFP_NET_RXR_MAX 64 /** * Read/Write config words (0x0000 - 0x002c) - * %NFP_NET_CFG_CTRL: Global control + * %NFP_NET_CFG_CTRL: Global control * %NFP_NET_CFG_UPDATE: Indicate which fields are updated * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings * %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings - * %NFP_NET_CFG_MTU: Set MTU size + * %NFP_NET_CFG_MTU: Set MTU size * %NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) - * %NFP_NET_CFG_EXN: MSI-X table entry for exceptions - * %NFP_NET_CFG_LSC: MSI-X table entry for link state changes + * %NFP_NET_CFG_EXN: MSI-X table entry for exceptions + * %NFP_NET_CFG_LSC: MSI-X table entry for link state changes * %NFP_NET_CFG_MACADDR: MAC address * * TODO: * - define Error details in UPDATE */ -#define NFP_NET_CFG_CTRL 0x0000 -#define NFP_NET_CFG_CTRL_ENABLE (0x1 << 0) /* Global enable */ -#define NFP_NET_CFG_CTRL_PROMISC (0x1 << 1) /* Enable Promisc mode */ -#define NFP_NET_CFG_CTRL_L2BC (0x1 << 2) /* Allow L2 Broadcast */ -#define NFP_NET_CFG_CTRL_L2MC (0x1 << 3) /* Allow L2 Multicast */ -#define NFP_NET_CFG_CTRL_RXCSUM (0x1 << 4) /* Enable RX Checksum */ -#define NFP_NET_CFG_CTRL_TXCSUM (0x1 << 5) /* Enable TX Checksum */ -#define NFP_NET_CFG_CTRL_RXVLAN (0x1 << 6) /* Enable VLAN strip */ -#define NFP_NET_CFG_CTRL_TXVLAN (0x1 << 7) /* Enable VLAN insert */ -#define NFP_NET_CFG_CTRL_SCATTER (0x1 << 8) /* Scatter DMA */ -#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */ -#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */ +#define NFP_NET_CFG_CTRL 0x0000 +#define NFP_NET_CFG_CTRL_ENABLE (0x1 << 0) /* Global enable */ +#define NFP_NET_CFG_CTRL_PROMISC (0x1 << 1) /* Enable Promisc mode */ +#define NFP_NET_CFG_CTRL_L2BC (0x1 << 2) /* Allow L2 Broadcast */ +#define NFP_NET_CFG_CTRL_L2MC (0x1 << 3) /* Allow L2 Multicast */ +#define NFP_NET_CFG_CTRL_RXCSUM (0x1 << 4) /* Enable RX Checksum */ +#define NFP_NET_CFG_CTRL_TXCSUM (0x1 << 5) /* Enable TX Checksum */ +#define NFP_NET_CFG_CTRL_RXVLAN (0x1 << 6) /* Enable VLAN strip */ +#define NFP_NET_CFG_CTRL_TXVLAN (0x1 << 7) /* Enable VLAN insert */ +#define NFP_NET_CFG_CTRL_SCATTER (0x1 << 8) /* Scatter DMA */ +#define NFP_NET_CFG_CTRL_GATHER (0x1 << 9) /* Gather DMA */ +#define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */ #define NFP_NET_CFG_CTRL_CTAG_FILTER (0x1 << 11) /* VLAN CTAG filtering */ -#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ +#define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ #define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS (version 1) */ -#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ -#define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */ -#define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */ -#define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/ -#define NFP_NET_CFG_CTRL_L2SWITCH (0x1 << 22) /* L2 Switch */ +#define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ +#define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */ +#define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */ +#define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/ +#define NFP_NET_CFG_CTRL_L2SWITCH (0x1 << 22) /* L2 Switch */ #define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */ #define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ #define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ @@ -152,35 +152,35 @@ #define NFP_NET_CFG_CTRL_CHAIN_META (NFP_NET_CFG_CTRL_RSS2 | \ NFP_NET_CFG_CTRL_CSUM_COMPLETE) -#define NFP_NET_CFG_UPDATE 0x0004 -#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ -#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ -#define NFP_NET_CFG_UPDATE_RSS (0x1 << 2) /* RSS config change */ -#define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */ -#define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */ -#define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */ -#define NFP_NET_CFG_UPDATE_L2SWITCH (0x1 << 6) /* Switch changes */ -#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ -#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ +#define NFP_NET_CFG_UPDATE 0x0004 +#define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ +#define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ +#define NFP_NET_CFG_UPDATE_RSS (0x1 << 2) /* RSS config change */ +#define NFP_NET_CFG_UPDATE_TXRPRIO (0x1 << 3) /* TX Ring prio change */ +#define NFP_NET_CFG_UPDATE_RXRPRIO (0x1 << 4) /* RX Ring prio change */ +#define NFP_NET_CFG_UPDATE_MSIX (0x1 << 5) /* MSI-X change */ +#define NFP_NET_CFG_UPDATE_L2SWITCH (0x1 << 6) /* Switch changes */ +#define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ +#define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ #define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ #define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */ #define NFP_NET_CFG_UPDATE_MACADDR (0x1 << 11) /* MAC address change */ #define NFP_NET_CFG_UPDATE_MBOX (0x1 << 12) /* Mailbox update */ #define NFP_NET_CFG_UPDATE_VF (0x1 << 13) /* VF settings change */ -#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ -#define NFP_NET_CFG_TXRS_ENABLE 0x0008 -#define NFP_NET_CFG_RXRS_ENABLE 0x0010 -#define NFP_NET_CFG_MTU 0x0018 -#define NFP_NET_CFG_FLBUFSZ 0x001c -#define NFP_NET_CFG_EXN 0x001f -#define NFP_NET_CFG_LSC 0x0020 -#define NFP_NET_CFG_MACADDR 0x0024 +#define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ +#define NFP_NET_CFG_TXRS_ENABLE 0x0008 +#define NFP_NET_CFG_RXRS_ENABLE 0x0010 +#define NFP_NET_CFG_MTU 0x0018 +#define NFP_NET_CFG_FLBUFSZ 0x001c +#define NFP_NET_CFG_EXN 0x001f +#define NFP_NET_CFG_LSC 0x0020 +#define NFP_NET_CFG_MACADDR 0x0024 /** * Read-only words (0x0030 - 0x0050): * %NFP_NET_CFG_VERSION: Firmware version number - * %NFP_NET_CFG_STS: Status - * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL) + * %NFP_NET_CFG_STS: Status + * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL) * %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings * %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings * %NFP_NET_CFG_MAX_MTU: Maximum support MTU @@ -190,37 +190,37 @@ * TODO: * - define more STS bits */ -#define NFP_NET_CFG_VERSION 0x0030 +#define NFP_NET_CFG_VERSION 0x0030 #define NFP_NET_CFG_VERSION_RESERVED_MASK (0xff << 24) #define NFP_NET_CFG_VERSION_CLASS_MASK (0xff << 16) -#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16) +#define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16) #define NFP_NET_CFG_VERSION_CLASS_GENERIC 0 #define NFP_NET_CFG_VERSION_MAJOR_MASK (0xff << 8) -#define NFP_NET_CFG_VERSION_MAJOR(x) (((x) & 0xff) << 8) +#define NFP_NET_CFG_VERSION_MAJOR(x) (((x) & 0xff) << 8) #define NFP_NET_CFG_VERSION_MINOR_MASK (0xff << 0) -#define NFP_NET_CFG_VERSION_MINOR(x) (((x) & 0xff) << 0) -#define NFP_NET_CFG_STS 0x0034 -#define NFP_NET_CFG_STS_LINK (0x1 << 0) /* Link up or down */ +#define NFP_NET_CFG_VERSION_MINOR(x) (((x) & 0xff) << 0) +#define NFP_NET_CFG_STS 0x0034 +#define NFP_NET_CFG_STS_LINK (0x1 << 0) /* Link up or down */ /* Link rate */ #define NFP_NET_CFG_STS_LINK_RATE_SHIFT 1 #define NFP_NET_CFG_STS_LINK_RATE_MASK 0xF -#define NFP_NET_CFG_STS_LINK_RATE \ +#define NFP_NET_CFG_STS_LINK_RATE \ (NFP_NET_CFG_STS_LINK_RATE_MASK << NFP_NET_CFG_STS_LINK_RATE_SHIFT) #define NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED 0 -#define NFP_NET_CFG_STS_LINK_RATE_UNKNOWN 1 -#define NFP_NET_CFG_STS_LINK_RATE_1G 2 -#define NFP_NET_CFG_STS_LINK_RATE_10G 3 -#define NFP_NET_CFG_STS_LINK_RATE_25G 4 -#define NFP_NET_CFG_STS_LINK_RATE_40G 5 -#define NFP_NET_CFG_STS_LINK_RATE_50G 6 -#define NFP_NET_CFG_STS_LINK_RATE_100G 7 -#define NFP_NET_CFG_CAP 0x0038 -#define NFP_NET_CFG_MAX_TXRINGS 0x003c -#define NFP_NET_CFG_MAX_RXRINGS 0x0040 -#define NFP_NET_CFG_MAX_MTU 0x0044 +#define NFP_NET_CFG_STS_LINK_RATE_UNKNOWN 1 +#define NFP_NET_CFG_STS_LINK_RATE_1G 2 +#define NFP_NET_CFG_STS_LINK_RATE_10G 3 +#define NFP_NET_CFG_STS_LINK_RATE_25G 4 +#define NFP_NET_CFG_STS_LINK_RATE_40G 5 +#define NFP_NET_CFG_STS_LINK_RATE_50G 6 +#define NFP_NET_CFG_STS_LINK_RATE_100G 7 +#define NFP_NET_CFG_CAP 0x0038 +#define NFP_NET_CFG_MAX_TXRINGS 0x003c +#define NFP_NET_CFG_MAX_RXRINGS 0x0040 +#define NFP_NET_CFG_MAX_MTU 0x0044 /* Next two words are being used by VFs for solving THB350 issue */ -#define NFP_NET_CFG_START_TXQ 0x0048 -#define NFP_NET_CFG_START_RXQ 0x004c +#define NFP_NET_CFG_START_TXQ 0x0048 +#define NFP_NET_CFG_START_RXQ 0x004c /** * Prepend configuration @@ -280,8 +280,8 @@ /** * 40B reserved for future use (0x0098 - 0x00c0) */ -#define NFP_NET_CFG_RESERVED 0x0098 -#define NFP_NET_CFG_RESERVED_SZ 0x0028 +#define NFP_NET_CFG_RESERVED 0x0098 +#define NFP_NET_CFG_RESERVED_SZ 0x0028 /** * RSS configuration (0x0100 - 0x01ac): @@ -290,26 +290,26 @@ * %NFP_NET_CFG_RSS_KEY: RSS "secret" key * %NFP_NET_CFG_RSS_ITBL: RSS indirection table */ -#define NFP_NET_CFG_RSS_BASE 0x0100 -#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE -#define NFP_NET_CFG_RSS_MASK (0x7f) -#define NFP_NET_CFG_RSS_MASK_of(_x) ((_x) & 0x7f) -#define NFP_NET_CFG_RSS_IPV4 (1 << 8) /* RSS for IPv4 */ -#define NFP_NET_CFG_RSS_IPV6 (1 << 9) /* RSS for IPv6 */ -#define NFP_NET_CFG_RSS_IPV4_TCP (1 << 10) /* RSS for IPv4/TCP */ -#define NFP_NET_CFG_RSS_IPV4_UDP (1 << 11) /* RSS for IPv4/UDP */ -#define NFP_NET_CFG_RSS_IPV6_TCP (1 << 12) /* RSS for IPv6/TCP */ -#define NFP_NET_CFG_RSS_IPV6_UDP (1 << 13) /* RSS for IPv6/UDP */ +#define NFP_NET_CFG_RSS_BASE 0x0100 +#define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE +#define NFP_NET_CFG_RSS_MASK (0x7f) +#define NFP_NET_CFG_RSS_MASK_of(_x) ((_x) & 0x7f) +#define NFP_NET_CFG_RSS_IPV4 (1 << 8) /* RSS for IPv4 */ +#define NFP_NET_CFG_RSS_IPV6 (1 << 9) /* RSS for IPv6 */ +#define NFP_NET_CFG_RSS_IPV4_TCP (1 << 10) /* RSS for IPv4/TCP */ +#define NFP_NET_CFG_RSS_IPV4_UDP (1 << 11) /* RSS for IPv4/UDP */ +#define NFP_NET_CFG_RSS_IPV6_TCP (1 << 12) /* RSS for IPv6/TCP */ +#define NFP_NET_CFG_RSS_IPV6_UDP (1 << 13) /* RSS for IPv6/UDP */ #define NFP_NET_CFG_RSS_HFUNC 0xff000000 -#define NFP_NET_CFG_RSS_TOEPLITZ (1 << 24) /* Use Toeplitz hash */ +#define NFP_NET_CFG_RSS_TOEPLITZ (1 << 24) /* Use Toeplitz hash */ #define NFP_NET_CFG_RSS_XOR (1 << 25) /* Use XOR as hash */ #define NFP_NET_CFG_RSS_CRC32 (1 << 26) /* Use CRC32 as hash */ #define NFP_NET_CFG_RSS_HFUNCS 3 -#define NFP_NET_CFG_RSS_KEY (NFP_NET_CFG_RSS_BASE + 0x4) -#define NFP_NET_CFG_RSS_KEY_SZ 0x28 -#define NFP_NET_CFG_RSS_ITBL (NFP_NET_CFG_RSS_BASE + 0x4 + \ +#define NFP_NET_CFG_RSS_KEY (NFP_NET_CFG_RSS_BASE + 0x4) +#define NFP_NET_CFG_RSS_KEY_SZ 0x28 +#define NFP_NET_CFG_RSS_ITBL (NFP_NET_CFG_RSS_BASE + 0x4 + \ NFP_NET_CFG_RSS_KEY_SZ) -#define NFP_NET_CFG_RSS_ITBL_SZ 0x80 +#define NFP_NET_CFG_RSS_ITBL_SZ 0x80 /** * TX ring configuration (0x200 - 0x800) @@ -321,13 +321,13 @@ * %NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) * %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet */ -#define NFP_NET_CFG_TXR_BASE 0x0200 -#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8)) -#define NFP_NET_CFG_TXR_WB_ADDR(_x) (NFP_NET_CFG_TXR_BASE + 0x200 + \ +#define NFP_NET_CFG_TXR_BASE 0x0200 +#define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8)) +#define NFP_NET_CFG_TXR_WB_ADDR(_x) (NFP_NET_CFG_TXR_BASE + 0x200 + \ ((_x) * 0x8)) -#define NFP_NET_CFG_TXR_SZ(_x) (NFP_NET_CFG_TXR_BASE + 0x400 + (_x)) -#define NFP_NET_CFG_TXR_VEC(_x) (NFP_NET_CFG_TXR_BASE + 0x440 + (_x)) -#define NFP_NET_CFG_TXR_PRIO(_x) (NFP_NET_CFG_TXR_BASE + 0x480 + (_x)) +#define NFP_NET_CFG_TXR_SZ(_x) (NFP_NET_CFG_TXR_BASE + 0x400 + (_x)) +#define NFP_NET_CFG_TXR_VEC(_x) (NFP_NET_CFG_TXR_BASE + 0x440 + (_x)) +#define NFP_NET_CFG_TXR_PRIO(_x) (NFP_NET_CFG_TXR_BASE + 0x480 + (_x)) #define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \ ((_x) * 0x4)) @@ -340,11 +340,11 @@ * %NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) * %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) */ -#define NFP_NET_CFG_RXR_BASE 0x0800 -#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8)) -#define NFP_NET_CFG_RXR_SZ(_x) (NFP_NET_CFG_RXR_BASE + 0x200 + (_x)) -#define NFP_NET_CFG_RXR_VEC(_x) (NFP_NET_CFG_RXR_BASE + 0x240 + (_x)) -#define NFP_NET_CFG_RXR_PRIO(_x) (NFP_NET_CFG_RXR_BASE + 0x280 + (_x)) +#define NFP_NET_CFG_RXR_BASE 0x0800 +#define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8)) +#define NFP_NET_CFG_RXR_SZ(_x) (NFP_NET_CFG_RXR_BASE + 0x200 + (_x)) +#define NFP_NET_CFG_RXR_VEC(_x) (NFP_NET_CFG_RXR_BASE + 0x240 + (_x)) +#define NFP_NET_CFG_RXR_PRIO(_x) (NFP_NET_CFG_RXR_BASE + 0x280 + (_x)) #define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \ ((_x) * 0x4)) @@ -358,36 +358,36 @@ * the MSI-X entry and the host driver must clear the register to * re-enable the interrupt. */ -#define NFP_NET_CFG_ICR_BASE 0x0c00 -#define NFP_NET_CFG_ICR(_x) (NFP_NET_CFG_ICR_BASE + (_x)) -#define NFP_NET_CFG_ICR_UNMASKED 0x0 -#define NFP_NET_CFG_ICR_RXTX 0x1 -#define NFP_NET_CFG_ICR_LSC 0x2 +#define NFP_NET_CFG_ICR_BASE 0x0c00 +#define NFP_NET_CFG_ICR(_x) (NFP_NET_CFG_ICR_BASE + (_x)) +#define NFP_NET_CFG_ICR_UNMASKED 0x0 +#define NFP_NET_CFG_ICR_RXTX 0x1 +#define NFP_NET_CFG_ICR_LSC 0x2 /** * General device stats (0x0d00 - 0x0d90) * all counters are 64bit. */ -#define NFP_NET_CFG_STATS_BASE 0x0d00 -#define NFP_NET_CFG_STATS_RX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x00) -#define NFP_NET_CFG_STATS_RX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x08) -#define NFP_NET_CFG_STATS_RX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x10) -#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18) -#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20) -#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28) -#define NFP_NET_CFG_STATS_RX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x30) -#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38) -#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40) - -#define NFP_NET_CFG_STATS_TX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x48) -#define NFP_NET_CFG_STATS_TX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x50) -#define NFP_NET_CFG_STATS_TX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x58) -#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60) -#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68) -#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70) -#define NFP_NET_CFG_STATS_TX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x78) -#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) -#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) +#define NFP_NET_CFG_STATS_BASE 0x0d00 +#define NFP_NET_CFG_STATS_RX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x00) +#define NFP_NET_CFG_STATS_RX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x08) +#define NFP_NET_CFG_STATS_RX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x10) +#define NFP_NET_CFG_STATS_RX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x18) +#define NFP_NET_CFG_STATS_RX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x20) +#define NFP_NET_CFG_STATS_RX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x28) +#define NFP_NET_CFG_STATS_RX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x30) +#define NFP_NET_CFG_STATS_RX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x38) +#define NFP_NET_CFG_STATS_RX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x40) + +#define NFP_NET_CFG_STATS_TX_DISCARDS (NFP_NET_CFG_STATS_BASE + 0x48) +#define NFP_NET_CFG_STATS_TX_ERRORS (NFP_NET_CFG_STATS_BASE + 0x50) +#define NFP_NET_CFG_STATS_TX_OCTETS (NFP_NET_CFG_STATS_BASE + 0x58) +#define NFP_NET_CFG_STATS_TX_UC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x60) +#define NFP_NET_CFG_STATS_TX_MC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x68) +#define NFP_NET_CFG_STATS_TX_BC_OCTETS (NFP_NET_CFG_STATS_BASE + 0x70) +#define NFP_NET_CFG_STATS_TX_FRAMES (NFP_NET_CFG_STATS_BASE + 0x78) +#define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) +#define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) #define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90) #define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98) @@ -404,11 +404,11 @@ * %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) * %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) */ -#define NFP_NET_CFG_TXR_STATS_BASE 0x1000 -#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \ +#define NFP_NET_CFG_TXR_STATS_BASE 0x1000 +#define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \ ((_x) * 0x10)) -#define NFP_NET_CFG_RXR_STATS_BASE 0x1400 -#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \ +#define NFP_NET_CFG_RXR_STATS_BASE 0x1400 +#define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \ ((_x) * 0x10)) /** @@ -444,7 +444,7 @@ * %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV * %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV - * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments + * %NFP_NET_CFG_TLV_LENGTH_INC: TLV length increments * %NFP_NET_CFG_TLV_VALUE: Offset of value with the TLV * * List of simple TLV structures, first one starts at %NFP_NET_CFG_TLV_BASE. @@ -457,12 +457,12 @@ * Note that the 4 byte TLV header is not counted in %NFP_NET_CFG_TLV_LENGTH. */ #define NFP_NET_CFG_TLV_TYPE 0x00 -#define NFP_NET_CFG_TLV_TYPE_REQUIRED 0x8000 +#define NFP_NET_CFG_TLV_TYPE_REQUIRED 0x8000 #define NFP_NET_CFG_TLV_LENGTH 0x02 #define NFP_NET_CFG_TLV_LENGTH_INC 4 #define NFP_NET_CFG_TLV_VALUE 0x04 -#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000 +#define NFP_NET_CFG_TLV_HEADER_REQUIRED 0x80000000 #define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000 #define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 96a27b00c90e..b81f4faf7b10 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1018,6 +1018,7 @@ struct ravb_private { u32 dirty_rx[NUM_RX_QUEUE]; /* Producer ring indices */ u32 cur_tx[NUM_TX_QUEUE]; u32 dirty_tx[NUM_TX_QUEUE]; + u32 rx_buf_sz; /* Based on MTU+slack. */ struct napi_struct napi[NUM_RX_QUEUE]; struct work_struct work; /* MII transceiver section. */ diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index a95fbd5510d9..54a6265da7a0 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -238,7 +238,7 @@ static void ravb_ring_free(struct net_device *ndev, int q) le32_to_cpu(desc->dptr))) dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), - PKT_BUF_SZ, + priv->rx_buf_sz, DMA_FROM_DEVICE); } ring_size = sizeof(struct ravb_ex_rx_desc) * @@ -300,9 +300,9 @@ static void ravb_ring_format(struct net_device *ndev, int q) for (i = 0; i < priv->num_rx_ring[q]; i++) { /* RX descriptor */ rx_desc = &priv->rx_ring[q][i]; - rx_desc->ds_cc = cpu_to_le16(PKT_BUF_SZ); + rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz); dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data, - PKT_BUF_SZ, + priv->rx_buf_sz, DMA_FROM_DEVICE); /* We just set the data size to 0 for a failed mapping which * should prevent DMA from happening... @@ -346,6 +346,10 @@ static int ravb_ring_init(struct net_device *ndev, int q) int ring_size; int i; + /* +16 gets room from the status from the card. */ + priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) + + ETH_HLEN + VLAN_HLEN; + /* Allocate RX and TX skb rings */ priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q], sizeof(*priv->rx_skb[q]), GFP_KERNEL); @@ -355,7 +359,7 @@ static int ravb_ring_init(struct net_device *ndev, int q) goto error; for (i = 0; i < priv->num_rx_ring[q]; i++) { - skb = netdev_alloc_skb(ndev, PKT_BUF_SZ + RAVB_ALIGN - 1); + skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1); if (!skb) goto error; ravb_set_buffer_align(skb); @@ -586,7 +590,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) skb = priv->rx_skb[q][entry]; priv->rx_skb[q][entry] = NULL; dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr), - PKT_BUF_SZ, + priv->rx_buf_sz, DMA_FROM_DEVICE); get_ts &= (q == RAVB_NC) ? RAVB_RXTSTAMP_TYPE_V2_L2_EVENT : @@ -619,11 +623,12 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q) for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) { entry = priv->dirty_rx[q] % priv->num_rx_ring[q]; desc = &priv->rx_ring[q][entry]; - desc->ds_cc = cpu_to_le16(PKT_BUF_SZ); + desc->ds_cc = cpu_to_le16(priv->rx_buf_sz); if (!priv->rx_skb[q][entry]) { skb = netdev_alloc_skb(ndev, - PKT_BUF_SZ + RAVB_ALIGN - 1); + priv->rx_buf_sz + + RAVB_ALIGN - 1); if (!skb) break; /* Better luck next round. */ ravb_set_buffer_align(skb); @@ -1854,6 +1859,17 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) return phy_mii_ioctl(phydev, req, cmd); } +static int ravb_change_mtu(struct net_device *ndev, int new_mtu) +{ + if (netif_running(ndev)) + return -EBUSY; + + ndev->mtu = new_mtu; + netdev_update_features(ndev); + + return 0; +} + static void ravb_set_rx_csum(struct net_device *ndev, bool enable) { struct ravb_private *priv = netdev_priv(ndev); @@ -1895,6 +1911,7 @@ static const struct net_device_ops ravb_netdev_ops = { .ndo_set_rx_mode = ravb_set_rx_mode, .ndo_tx_timeout = ravb_tx_timeout, .ndo_do_ioctl = ravb_do_ioctl, + .ndo_change_mtu = ravb_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_set_features = ravb_set_features, @@ -2117,6 +2134,9 @@ static int ravb_probe(struct platform_device *pdev) goto out_release; } + ndev->max_mtu = 2048 - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN); + ndev->min_mtu = ETH_MIN_MTU; + /* Set function */ ndev->netdev_ops = &ravb_netdev_ops; ndev->ethtool_ops = &ravb_ethtool_ops; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 92dcf8717fc6..d7d5a6d15219 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -961,20 +961,16 @@ static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd) static int sh_eth_check_reset(struct net_device *ndev) { - int ret = 0; - int cnt = 100; + int cnt; - while (cnt > 0) { + for (cnt = 100; cnt > 0; cnt--) { if (!(sh_eth_read(ndev, EDMR) & EDMR_SRST_GETHER)) - break; + return 0; mdelay(1); - cnt--; - } - if (cnt <= 0) { - netdev_err(ndev, "Device reset failed\n"); - ret = -ETIMEDOUT; } - return ret; + + netdev_err(ndev, "Device reset failed\n"); + return -ETIMEDOUT; } static int sh_eth_reset(struct net_device *ndev) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 5270d26f0bc6..2d5d4aea3bcb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -48,26 +48,18 @@ #define MUX_CLK_NUM_PARENTS 2 struct meson8b_dwmac { - struct platform_device *pdev; - + struct device *dev; void __iomem *regs; - phy_interface_t phy_mode; + struct clk *rgmii_tx_clk; + u32 tx_delay_ns; +}; +struct meson8b_dwmac_clk_configs { struct clk_mux m250_mux; - struct clk *m250_mux_clk; - struct clk *m250_mux_parent[MUX_CLK_NUM_PARENTS]; - struct clk_divider m250_div; - struct clk *m250_div_clk; - struct clk_fixed_factor fixed_div2; - struct clk *fixed_div2_clk; - struct clk_gate rgmii_tx_en; - struct clk *rgmii_tx_en_clk; - - u32 tx_delay_ns; }; static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg, @@ -82,106 +74,99 @@ static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg, writel(data, dwmac->regs + reg); } -static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) +static struct clk *meson8b_dwmac_register_clk(struct meson8b_dwmac *dwmac, + const char *name_suffix, + const char **parent_names, + int num_parents, + const struct clk_ops *ops, + struct clk_hw *hw) { struct clk_init_data init; - int i, ret; - struct device *dev = &dwmac->pdev->dev; char clk_name[32]; - const char *clk_div_parents[1]; - const char *mux_parent_names[MUX_CLK_NUM_PARENTS]; + + snprintf(clk_name, sizeof(clk_name), "%s#%s", dev_name(dwmac->dev), + name_suffix); + + init.name = clk_name; + init.ops = ops; + init.flags = CLK_SET_RATE_PARENT; + init.parent_names = parent_names; + init.num_parents = num_parents; + + hw->init = &init; + + return devm_clk_register(dwmac->dev, hw); +} + +static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) +{ + int i, ret; + struct clk *clk; + struct device *dev = dwmac->dev; + const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS]; + struct meson8b_dwmac_clk_configs *clk_configs; + + clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL); + if (!clk_configs) + return -ENOMEM; /* get the mux parents from DT */ for (i = 0; i < MUX_CLK_NUM_PARENTS; i++) { char name[16]; snprintf(name, sizeof(name), "clkin%d", i); - dwmac->m250_mux_parent[i] = devm_clk_get(dev, name); - if (IS_ERR(dwmac->m250_mux_parent[i])) { - ret = PTR_ERR(dwmac->m250_mux_parent[i]); + clk = devm_clk_get(dev, name); + if (IS_ERR(clk)) { + ret = PTR_ERR(clk); if (ret != -EPROBE_DEFER) dev_err(dev, "Missing clock %s\n", name); return ret; } - mux_parent_names[i] = - __clk_get_name(dwmac->m250_mux_parent[i]); + mux_parent_names[i] = __clk_get_name(clk); } - /* create the m250_mux */ - snprintf(clk_name, sizeof(clk_name), "%s#m250_sel", dev_name(dev)); - init.name = clk_name; - init.ops = &clk_mux_ops; - init.flags = CLK_SET_RATE_PARENT; - init.parent_names = mux_parent_names; - init.num_parents = MUX_CLK_NUM_PARENTS; - - dwmac->m250_mux.reg = dwmac->regs + PRG_ETH0; - dwmac->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT; - dwmac->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK; - dwmac->m250_mux.flags = 0; - dwmac->m250_mux.table = NULL; - dwmac->m250_mux.hw.init = &init; - - dwmac->m250_mux_clk = devm_clk_register(dev, &dwmac->m250_mux.hw); - if (WARN_ON(IS_ERR(dwmac->m250_mux_clk))) - return PTR_ERR(dwmac->m250_mux_clk); - - /* create the m250_div */ - snprintf(clk_name, sizeof(clk_name), "%s#m250_div", dev_name(dev)); - init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL); - init.ops = &clk_divider_ops; - init.flags = CLK_SET_RATE_PARENT; - clk_div_parents[0] = __clk_get_name(dwmac->m250_mux_clk); - init.parent_names = clk_div_parents; - init.num_parents = ARRAY_SIZE(clk_div_parents); - - dwmac->m250_div.reg = dwmac->regs + PRG_ETH0; - dwmac->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT; - dwmac->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH; - dwmac->m250_div.hw.init = &init; - dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED | + clk_configs->m250_mux.reg = dwmac->regs + PRG_ETH0; + clk_configs->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT; + clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK; + clk = meson8b_dwmac_register_clk(dwmac, "m250_sel", mux_parent_names, + MUX_CLK_NUM_PARENTS, &clk_mux_ops, + &clk_configs->m250_mux.hw); + if (WARN_ON(IS_ERR(clk))) + return PTR_ERR(clk); + + parent_name = __clk_get_name(clk); + clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0; + clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT; + clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH; + clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO | CLK_DIVIDER_ROUND_CLOSEST; - - dwmac->m250_div_clk = devm_clk_register(dev, &dwmac->m250_div.hw); - if (WARN_ON(IS_ERR(dwmac->m250_div_clk))) - return PTR_ERR(dwmac->m250_div_clk); - - /* create the fixed_div2 */ - snprintf(clk_name, sizeof(clk_name), "%s#fixed_div2", dev_name(dev)); - init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL); - init.ops = &clk_fixed_factor_ops; - init.flags = CLK_SET_RATE_PARENT; - clk_div_parents[0] = __clk_get_name(dwmac->m250_div_clk); - init.parent_names = clk_div_parents; - init.num_parents = ARRAY_SIZE(clk_div_parents); - - dwmac->fixed_div2.mult = 1; - dwmac->fixed_div2.div = 2; - dwmac->fixed_div2.hw.init = &init; - - dwmac->fixed_div2_clk = devm_clk_register(dev, &dwmac->fixed_div2.hw); - if (WARN_ON(IS_ERR(dwmac->fixed_div2_clk))) - return PTR_ERR(dwmac->fixed_div2_clk); - - /* create the rgmii_tx_en */ - init.name = devm_kasprintf(dev, GFP_KERNEL, "%s#rgmii_tx_en", - dev_name(dev)); - init.ops = &clk_gate_ops; - init.flags = CLK_SET_RATE_PARENT; - clk_div_parents[0] = __clk_get_name(dwmac->fixed_div2_clk); - init.parent_names = clk_div_parents; - init.num_parents = ARRAY_SIZE(clk_div_parents); - - dwmac->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0; - dwmac->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN; - dwmac->rgmii_tx_en.hw.init = &init; - - dwmac->rgmii_tx_en_clk = devm_clk_register(dev, - &dwmac->rgmii_tx_en.hw); - if (WARN_ON(IS_ERR(dwmac->rgmii_tx_en_clk))) - return PTR_ERR(dwmac->rgmii_tx_en_clk); + clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1, + &clk_divider_ops, + &clk_configs->m250_div.hw); + if (WARN_ON(IS_ERR(clk))) + return PTR_ERR(clk); + + parent_name = __clk_get_name(clk); + clk_configs->fixed_div2.mult = 1; + clk_configs->fixed_div2.div = 2; + clk = meson8b_dwmac_register_clk(dwmac, "fixed_div2", &parent_name, 1, + &clk_fixed_factor_ops, + &clk_configs->fixed_div2.hw); + if (WARN_ON(IS_ERR(clk))) + return PTR_ERR(clk); + + parent_name = __clk_get_name(clk); + clk_configs->rgmii_tx_en.reg = dwmac->regs + PRG_ETH0; + clk_configs->rgmii_tx_en.bit_idx = PRG_ETH0_RGMII_TX_CLK_EN; + clk = meson8b_dwmac_register_clk(dwmac, "rgmii_tx_en", &parent_name, 1, + &clk_gate_ops, + &clk_configs->rgmii_tx_en.hw); + if (WARN_ON(IS_ERR(clk))) + return PTR_ERR(clk); + + dwmac->rgmii_tx_clk = clk; return 0; } @@ -219,19 +204,23 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac) * a register) based on the line-speed (125MHz for Gbit speeds, * 25MHz for 100Mbit/s and 2.5MHz for 10Mbit/s). */ - ret = clk_set_rate(dwmac->rgmii_tx_en_clk, 125 * 1000 * 1000); + ret = clk_set_rate(dwmac->rgmii_tx_clk, 125 * 1000 * 1000); if (ret) { - dev_err(&dwmac->pdev->dev, + dev_err(dwmac->dev, "failed to set RGMII TX clock\n"); return ret; } - ret = clk_prepare_enable(dwmac->rgmii_tx_en_clk); + ret = clk_prepare_enable(dwmac->rgmii_tx_clk); if (ret) { - dev_err(&dwmac->pdev->dev, + dev_err(dwmac->dev, "failed to enable the RGMII TX clock\n"); return ret; } + + devm_add_action_or_reset(dwmac->dev, + (void(*)(void *))clk_disable_unprepare, + dwmac->rgmii_tx_clk); break; case PHY_INTERFACE_MODE_RMII: @@ -251,7 +240,7 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac) break; default: - dev_err(&dwmac->pdev->dev, "unsupported phy-mode %s\n", + dev_err(dwmac->dev, "unsupported phy-mode %s\n", phy_modes(dwmac->phy_mode)); return -EINVAL; } @@ -292,7 +281,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) goto err_remove_config_dt; } - dwmac->pdev = pdev; + dwmac->dev = &pdev->dev; dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node); if (dwmac->phy_mode < 0) { dev_err(&pdev->dev, "missing phy-mode property\n"); @@ -317,29 +306,16 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) - goto err_clk_disable; + goto err_remove_config_dt; return 0; -err_clk_disable: - if (phy_interface_mode_is_rgmii(dwmac->phy_mode)) - clk_disable_unprepare(dwmac->rgmii_tx_en_clk); err_remove_config_dt: stmmac_remove_config_dt(pdev, plat_dat); return ret; } -static int meson8b_dwmac_remove(struct platform_device *pdev) -{ - struct meson8b_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); - - if (phy_interface_mode_is_rgmii(dwmac->phy_mode)) - clk_disable_unprepare(dwmac->rgmii_tx_en_clk); - - return stmmac_pltfr_remove(pdev); -} - static const struct of_device_id meson8b_dwmac_match[] = { { .compatible = "amlogic,meson8b-dwmac" }, { .compatible = "amlogic,meson-gxbb-dwmac" }, @@ -349,7 +325,7 @@ MODULE_DEVICE_TABLE(of, meson8b_dwmac_match); static struct platform_driver meson8b_dwmac_driver = { .probe = meson8b_dwmac_probe, - .remove = meson8b_dwmac_remove, + .remove = stmmac_pltfr_remove, .driver = { .name = "meson8b-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 30612497643c..b97a907ea5aa 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -230,4 +230,5 @@ out: /* Registered in net/core/dev.c */ struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, + .async = true, }; diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index ab58224f897f..b3935778b19f 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -75,6 +75,8 @@ #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX 0x0 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN 0x1f +#define DP83867_IO_MUX_CFG_CLK_O_SEL_MASK (0x1f << 8) +#define DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT 8 /* CFG4 bits */ #define DP83867_CFG4_PORT_MIRROR_EN BIT(0) @@ -92,6 +94,7 @@ struct dp83867_private { int io_impedance; int port_mirroring; bool rxctrl_strap_quirk; + int clk_output_sel; }; static int dp83867_ack_interrupt(struct phy_device *phydev) @@ -160,6 +163,14 @@ static int dp83867_of_init(struct phy_device *phydev) dp83867->io_impedance = -EINVAL; /* Optional configuration */ + ret = of_property_read_u32(of_node, "ti,clk-output-sel", + &dp83867->clk_output_sel); + if (ret || dp83867->clk_output_sel > DP83867_CLK_O_SEL_REF_CLK) + /* Keep the default value if ti,clk-output-sel is not set + * or too high + */ + dp83867->clk_output_sel = DP83867_CLK_O_SEL_REF_CLK; + if (of_property_read_bool(of_node, "ti,max-output-impedance")) dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX; else if (of_property_read_bool(of_node, "ti,min-output-impedance")) @@ -295,6 +306,14 @@ static int dp83867_config_init(struct phy_device *phydev) if (dp83867->port_mirroring != DP83867_PORT_MIRROING_KEEP) dp83867_config_port_mirroring(phydev); + /* Clock output selection if muxing property is set */ + if (dp83867->clk_output_sel != DP83867_CLK_O_SEL_REF_CLK) { + val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG); + val &= ~DP83867_IO_MUX_CFG_CLK_O_SEL_MASK; + val |= (dp83867->clk_output_sel << DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT); + phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG, val); + } + return 0; } diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 5aa59f41bf8c..bd89d1c559ce 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -714,7 +714,7 @@ err_put: } static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr, - int *usockaddr_len, int peer) + int peer) { int len = sizeof(struct sockaddr_pppox); struct sockaddr_pppox sp; @@ -726,9 +726,7 @@ static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr, memcpy(uaddr, &sp, len); - *usockaddr_len = len; - - return 0; + return len; } static int pppoe_ioctl(struct socket *sock, unsigned int cmd, diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 6dde9a0cfe76..8249d46a7844 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -483,7 +483,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, } static int pptp_getname(struct socket *sock, struct sockaddr *uaddr, - int *usockaddr_len, int peer) + int peer) { int len = sizeof(struct sockaddr_pppox); struct sockaddr_pppox sp; @@ -496,9 +496,7 @@ static int pptp_getname(struct socket *sock, struct sockaddr *uaddr, memcpy(uaddr, &sp, len); - *usockaddr_len = len; - - return 0; + return len; } static int pptp_release(struct socket *sock) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b52258c327d2..d531954512c7 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -78,6 +78,7 @@ #include <linux/mutex.h> #include <linux/uaccess.h> +#include <linux/proc_fs.h> /* Uncomment to enable debugging */ /* #define TUN_DEBUG 1 */ @@ -2286,11 +2287,67 @@ static int tun_validate(struct nlattr *tb[], struct nlattr *data[], return -EINVAL; } +static size_t tun_get_size(const struct net_device *dev) +{ + BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t)); + BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t)); + + return nla_total_size(sizeof(uid_t)) + /* OWNER */ + nla_total_size(sizeof(gid_t)) + /* GROUP */ + nla_total_size(sizeof(u8)) + /* TYPE */ + nla_total_size(sizeof(u8)) + /* PI */ + nla_total_size(sizeof(u8)) + /* VNET_HDR */ + nla_total_size(sizeof(u8)) + /* PERSIST */ + nla_total_size(sizeof(u8)) + /* MULTI_QUEUE */ + nla_total_size(sizeof(u32)) + /* NUM_QUEUES */ + nla_total_size(sizeof(u32)) + /* NUM_DISABLED_QUEUES */ + 0; +} + +static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + + if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK)) + goto nla_put_failure; + if (uid_valid(tun->owner) && + nla_put_u32(skb, IFLA_TUN_OWNER, + from_kuid_munged(current_user_ns(), tun->owner))) + goto nla_put_failure; + if (gid_valid(tun->group) && + nla_put_u32(skb, IFLA_TUN_GROUP, + from_kgid_munged(current_user_ns(), tun->group))) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI))) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR))) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST))) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE, + !!(tun->flags & IFF_MULTI_QUEUE))) + goto nla_put_failure; + if (tun->flags & IFF_MULTI_QUEUE) { + if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues)) + goto nla_put_failure; + if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES, + tun->numdisabled)) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static struct rtnl_link_ops tun_link_ops __read_mostly = { .kind = DRV_NAME, .priv_size = sizeof(struct tun_struct), .setup = tun_setup, .validate = tun_validate, + .get_size = tun_get_size, + .fill_info = tun_fill_info, }; static void tun_sock_write_space(struct sock *sk) @@ -2789,6 +2846,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, struct tun_struct *tun; void __user* argp = (void __user*)arg; struct ifreq ifr; + struct net *net; kuid_t owner; kgid_t group; int sndbuf; @@ -2797,7 +2855,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, int le; int ret; - if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) { + if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || + (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT; } else { @@ -2817,6 +2876,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, rtnl_lock(); tun = tun_get(tfile); + net = sock_net(&tfile->sk); if (cmd == TUNSETIFF) { ret = -EEXIST; if (tun) @@ -2824,7 +2884,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ifr.ifr_name[IFNAMSIZ-1] = '\0'; - ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr); + ret = tun_set_iff(net, file, &ifr); if (ret) goto unlock; @@ -2846,6 +2906,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, tfile->ifindex = ifindex; goto unlock; } + if (cmd == SIOCGSKNS) { + ret = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto unlock; + + ret = open_related_ns(&net->ns, get_net_ns); + goto unlock; + } ret = -EBADFD; if (!tun) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 139c61c8244a..239c78c53e58 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -736,7 +736,6 @@ static int vrf_rtable_create(struct net_device *dev) return -ENOMEM; rth->dst.output = vrf_output; - rth->rt_table_id = vrf->tb_id; rcu_assign_pointer(vrf->rth, rth); |