diff options
author | David S. Miller <davem@davemloft.net> | 2018-12-07 15:57:02 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-12-07 15:57:02 -0800 |
commit | 12edfdfc79860f42fa493f81518e040376b6a5bc (patch) | |
tree | 4deed43287cdaa618d43380586064b51b8e28306 | |
parent | 9f4c2cffd08c102107992cd01c12c4a53475baf6 (diff) | |
parent | 630ba007f4750722bc56dfebfaf1c0316c2fcb69 (diff) | |
download | linux-12edfdfc79860f42fa493f81518e040376b6a5bc.tar.bz2 |
Merge branch 'hns3-error-handling'
Salil Mehta says:
====================
net: hns3: Additions/optimizations related to HNS3 H/W err handling
This patch set primarily does following addtions and optimizations
related to error handling in HNS3 Ethernet driver:
1. Name changes for enable and process functions and minor loop
optimizations. [PATCH 1-6]
2. Modify query and clearing of RAS errors using new set of commands
because modules specific commands for clearing RCB PPP PF, SSU are
obselete. [PATCH 7]
3. Deletes logging 1-bit errors for RAS in HNS3 driver as these never
get reported to the driver. [PATCH 8]
4. Add handling of NIC hw errors reported through MSIx rather than
PCIe AER channel. [PATCH 9]
5. Add handling for the HW RAS and MSIx errors in the modules MAC, PPP
PF, MSIx SRAM, RCB and SSU. [PATCH 10-13]
6. Add handling of RoCEE RAS errors. [PATCH 14]
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
7 files changed, 1066 insertions, 656 deletions
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index a1707b77c47f..294e725c7c44 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -136,6 +136,7 @@ enum hnae3_reset_type { HNAE3_CORE_RESET, HNAE3_GLOBAL_RESET, HNAE3_IMP_RESET, + HNAE3_UNKNOWN_RESET, HNAE3_NONE_RESET, }; @@ -454,7 +455,7 @@ struct hnae3_ae_ops { int (*restore_fd_rules)(struct hnae3_handle *handle); void (*enable_fd)(struct hnae3_handle *handle, bool enable); int (*dbg_run_cmd)(struct hnae3_handle *handle, char *cmd_buf); - pci_ers_result_t (*process_hw_error)(struct hnae3_ae_dev *ae_dev); + pci_ers_result_t (*handle_hw_ras_error)(struct hnae3_ae_dev *ae_dev); bool (*get_hw_reset_stat)(struct hnae3_handle *handle); bool (*ae_dev_resetting)(struct hnae3_handle *handle); unsigned long (*ae_dev_reset_cnt)(struct hnae3_handle *handle); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index d1b2de2ecc89..69142a381064 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1828,8 +1828,8 @@ static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_NONE; } - if (ae_dev->ops->process_hw_error) - ret = ae_dev->ops->process_hw_error(ae_dev); + if (ae_dev->ops->handle_hw_ras_error) + ret = ae_dev->ops->handle_hw_ras_error(ae_dev); else return PCI_ERS_RESULT_NONE; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index e1805b972628..b1ee6feb0caf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -215,26 +215,29 @@ enum hclge_opcode_type { HCLGE_OPC_SFP_GET_SPEED = 0x7104, /* Error INT commands */ + HCLGE_MAC_COMMON_INT_EN = 0x030E, HCLGE_TM_SCH_ECC_INT_EN = 0x0829, - HCLGE_TM_SCH_ECC_ERR_RINT_CMD = 0x082d, - HCLGE_TM_SCH_ECC_ERR_RINT_CE = 0x082f, - HCLGE_TM_SCH_ECC_ERR_RINT_NFE = 0x0830, - HCLGE_TM_SCH_ECC_ERR_RINT_FE = 0x0831, - HCLGE_TM_SCH_MBIT_ECC_INFO_CMD = 0x0833, + HCLGE_SSU_ECC_INT_CMD = 0x0989, + HCLGE_SSU_COMMON_INT_CMD = 0x098C, + HCLGE_PPU_MPF_ECC_INT_CMD = 0x0B40, + HCLGE_PPU_MPF_OTHER_INT_CMD = 0x0B41, + HCLGE_PPU_PF_OTHER_INT_CMD = 0x0B42, HCLGE_COMMON_ECC_INT_CFG = 0x1505, - HCLGE_IGU_EGU_TNL_INT_QUERY = 0x1802, + HCLGE_QUERY_RAS_INT_STS_BD_NUM = 0x1510, + HCLGE_QUERY_CLEAR_MPF_RAS_INT = 0x1511, + HCLGE_QUERY_CLEAR_PF_RAS_INT = 0x1512, + HCLGE_QUERY_MSIX_INT_STS_BD_NUM = 0x1513, + HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT = 0x1514, + HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT = 0x1515, + HCLGE_CONFIG_ROCEE_RAS_INT_EN = 0x1580, + HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581, + HCLGE_ROCEE_PF_RAS_INT_CMD = 0x1584, HCLGE_IGU_EGU_TNL_INT_EN = 0x1803, - HCLGE_IGU_EGU_TNL_INT_CLR = 0x1804, - HCLGE_IGU_COMMON_INT_QUERY = 0x1805, HCLGE_IGU_COMMON_INT_EN = 0x1806, - HCLGE_IGU_COMMON_INT_CLR = 0x1807, HCLGE_TM_QCN_MEM_INT_CFG = 0x1A14, - HCLGE_TM_QCN_MEM_INT_INFO_CMD = 0x1A17, HCLGE_PPP_CMD0_INT_CMD = 0x2100, HCLGE_PPP_CMD1_INT_CMD = 0x2101, - HCLGE_NCSI_INT_QUERY = 0x2400, HCLGE_NCSI_INT_EN = 0x2401, - HCLGE_NCSI_INT_CLR = 0x2402, }; #define HCLGE_TQP_REG_OFFSET 0x80000 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 6da9e22d82d0..77deea0beeba 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -4,78 +4,39 @@ #include "hclge_err.h" static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = { - { .int_msk = BIT(0), .msg = "imp_itcm0_ecc_1bit_err" }, { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err" }, - { .int_msk = BIT(2), .msg = "imp_itcm1_ecc_1bit_err" }, { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err" }, - { .int_msk = BIT(4), .msg = "imp_itcm2_ecc_1bit_err" }, { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err" }, - { .int_msk = BIT(6), .msg = "imp_itcm3_ecc_1bit_err" }, { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err" }, - { .int_msk = BIT(8), .msg = "imp_dtcm0_mem0_ecc_1bit_err" }, { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err" }, - { .int_msk = BIT(10), .msg = "imp_dtcm0_mem1_ecc_1bit_err" }, { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err" }, - { .int_msk = BIT(12), .msg = "imp_dtcm1_mem0_ecc_1bit_err" }, { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err" }, - { .int_msk = BIT(14), .msg = "imp_dtcm1_mem1_ecc_1bit_err" }, { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err" }, - { /* sentinel */ } -}; - -static const struct hclge_hw_error hclge_imp_itcm4_ecc_int[] = { - { .int_msk = BIT(0), .msg = "imp_itcm4_ecc_1bit_err" }, - { .int_msk = BIT(1), .msg = "imp_itcm4_ecc_mbit_err" }, + { .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err" }, { /* sentinel */ } }; static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = { - { .int_msk = BIT(0), .msg = "cmdq_nic_rx_depth_ecc_1bit_err" }, { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err" }, - { .int_msk = BIT(2), .msg = "cmdq_nic_tx_depth_ecc_1bit_err" }, { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err" }, - { .int_msk = BIT(4), .msg = "cmdq_nic_rx_tail_ecc_1bit_err" }, { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err" }, - { .int_msk = BIT(6), .msg = "cmdq_nic_tx_tail_ecc_1bit_err" }, { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err" }, - { .int_msk = BIT(8), .msg = "cmdq_nic_rx_head_ecc_1bit_err" }, { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err" }, - { .int_msk = BIT(10), .msg = "cmdq_nic_tx_head_ecc_1bit_err" }, { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err" }, - { .int_msk = BIT(12), .msg = "cmdq_nic_rx_addr_ecc_1bit_err" }, { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err" }, - { .int_msk = BIT(14), .msg = "cmdq_nic_tx_addr_ecc_1bit_err" }, { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err" }, - { /* sentinel */ } -}; - -static const struct hclge_hw_error hclge_cmdq_rocee_mem_ecc_int[] = { - { .int_msk = BIT(0), .msg = "cmdq_rocee_rx_depth_ecc_1bit_err" }, - { .int_msk = BIT(1), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" }, - { .int_msk = BIT(2), .msg = "cmdq_rocee_tx_depth_ecc_1bit_err" }, - { .int_msk = BIT(3), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" }, - { .int_msk = BIT(4), .msg = "cmdq_rocee_rx_tail_ecc_1bit_err" }, - { .int_msk = BIT(5), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" }, - { .int_msk = BIT(6), .msg = "cmdq_rocee_tx_tail_ecc_1bit_err" }, - { .int_msk = BIT(7), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" }, - { .int_msk = BIT(8), .msg = "cmdq_rocee_rx_head_ecc_1bit_err" }, - { .int_msk = BIT(9), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" }, - { .int_msk = BIT(10), .msg = "cmdq_rocee_tx_head_ecc_1bit_err" }, - { .int_msk = BIT(11), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" }, - { .int_msk = BIT(12), .msg = "cmdq_rocee_rx_addr_ecc_1bit_err" }, - { .int_msk = BIT(13), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" }, - { .int_msk = BIT(14), .msg = "cmdq_rocee_tx_addr_ecc_1bit_err" }, - { .int_msk = BIT(15), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" }, + { .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" }, + { .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" }, + { .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" }, + { .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" }, + { .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" }, + { .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" }, + { .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" }, + { .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" }, { /* sentinel */ } }; static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = { - { .int_msk = BIT(0), .msg = "tqp_int_cfg_even_ecc_1bit_err" }, - { .int_msk = BIT(1), .msg = "tqp_int_cfg_odd_ecc_1bit_err" }, - { .int_msk = BIT(2), .msg = "tqp_int_ctrl_even_ecc_1bit_err" }, - { .int_msk = BIT(3), .msg = "tqp_int_ctrl_odd_ecc_1bit_err" }, - { .int_msk = BIT(4), .msg = "tx_que_scan_int_ecc_1bit_err" }, - { .int_msk = BIT(5), .msg = "rx_que_scan_int_ecc_1bit_err" }, { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err" }, { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err" }, { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err" }, @@ -85,15 +46,19 @@ static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = { { /* sentinel */ } }; -static const struct hclge_hw_error hclge_igu_com_err_int[] = { +static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = { + { .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err" }, + { .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_igu_int[] = { { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err" }, - { .int_msk = BIT(1), .msg = "igu_rx_buf0_ecc_1bit_err" }, { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err" }, - { .int_msk = BIT(3), .msg = "igu_rx_buf1_ecc_1bit_err" }, { /* sentinel */ } }; -static const struct hclge_hw_error hclge_igu_egu_tnl_err_int[] = { +static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = { { .int_msk = BIT(0), .msg = "rx_buf_overflow" }, { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow" }, { .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow" }, @@ -104,51 +69,11 @@ static const struct hclge_hw_error hclge_igu_egu_tnl_err_int[] = { }; static const struct hclge_hw_error hclge_ncsi_err_int[] = { - { .int_msk = BIT(0), .msg = "ncsi_tx_ecc_1bit_err" }, { .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err" }, { /* sentinel */ } }; -static const struct hclge_hw_error hclge_ppp_mpf_int0[] = { - { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_1bit_err" }, - { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_1bit_err" }, - { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_1bit_err" }, - { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_1bit_err" }, - { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_1bit_err" }, - { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_1bit_err" }, - { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_1bit_err" }, - { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_1bit_err" }, - { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_1bit_err" }, - { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_1bit_err" }, - { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_1bit_err" }, - { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_1bit_err" }, - { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_1bit_err" }, - { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_1bit_err" }, - { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_1bit_err" }, - { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_1bit_err" }, - { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_1bit_err" }, - { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_1bit_err" }, - { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_1bit_err" }, - { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_1bit_err" }, - { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_1bit_err" }, - { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_1bit_err" }, - { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_1bit_err" }, - { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_1bit_err" }, - { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_1bit_err" }, - { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_1bit_err" }, - { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_1bit_err" }, - { .int_msk = BIT(27), - .msg = "flow_director_ad_mem0_ecc_1bit_err" }, - { .int_msk = BIT(28), - .msg = "flow_director_ad_mem1_ecc_1bit_err" }, - { .int_msk = BIT(29), - .msg = "rx_vlan_tag_memory_ecc_1bit_err" }, - { .int_msk = BIT(30), - .msg = "Tx_UP_mapping_config_mem_ecc_1bit_err" }, - { /* sentinel */ } -}; - -static const struct hclge_hw_error hclge_ppp_mpf_int1[] = { +static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = { { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err" }, { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err" }, { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err" }, @@ -187,23 +112,13 @@ static const struct hclge_hw_error hclge_ppp_mpf_int1[] = { { /* sentinel */ } }; -static const struct hclge_hw_error hclge_ppp_pf_int[] = { - { .int_msk = BIT(0), .msg = "Tx_vlan_tag_err" }, +static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = { + { .int_msk = BIT(0), .msg = "tx_vlan_tag_err" }, { .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err" }, { /* sentinel */ } }; -static const struct hclge_hw_error hclge_ppp_mpf_int2[] = { - { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_1bit_err" }, - { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_1bit_err" }, - { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_1bit_err" }, - { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_1bit_err" }, - { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_1bit_err" }, - { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_1bit_err" }, - { /* sentinel */ } -}; - -static const struct hclge_hw_error hclge_ppp_mpf_int3[] = { +static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = { { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err" }, { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err" }, { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err" }, @@ -213,145 +128,248 @@ static const struct hclge_hw_error hclge_ppp_mpf_int3[] = { { /* sentinel */ } }; -struct hclge_tm_sch_ecc_info { - const char *name; -}; - -static const struct hclge_tm_sch_ecc_info hclge_tm_sch_ecc_err[7][15] = { - { - { .name = "QSET_QUEUE_CTRL:PRI_LEN TAB" }, - { .name = "QSET_QUEUE_CTRL:SPA_LEN TAB" }, - { .name = "QSET_QUEUE_CTRL:SPB_LEN TAB" }, - { .name = "QSET_QUEUE_CTRL:WRRA_LEN TAB" }, - { .name = "QSET_QUEUE_CTRL:WRRB_LEN TAB" }, - { .name = "QSET_QUEUE_CTRL:SPA_HPTR TAB" }, - { .name = "QSET_QUEUE_CTRL:SPB_HPTR TAB" }, - { .name = "QSET_QUEUE_CTRL:WRRA_HPTR TAB" }, - { .name = "QSET_QUEUE_CTRL:WRRB_HPTR TAB" }, - { .name = "QSET_QUEUE_CTRL:QS_LINKLIST TAB" }, - { .name = "QSET_QUEUE_CTRL:SPA_TPTR TAB" }, - { .name = "QSET_QUEUE_CTRL:SPB_TPTR TAB" }, - { .name = "QSET_QUEUE_CTRL:WRRA_TPTR TAB" }, - { .name = "QSET_QUEUE_CTRL:WRRB_TPTR TAB" }, - { .name = "QSET_QUEUE_CTRL:QS_DEFICITCNT TAB" }, - }, - { - { .name = "ROCE_QUEUE_CTRL:QS_LEN TAB" }, - { .name = "ROCE_QUEUE_CTRL:QS_TPTR TAB" }, - { .name = "ROCE_QUEUE_CTRL:QS_HPTR TAB" }, - { .name = "ROCE_QUEUE_CTRL:QLINKLIST TAB" }, - { .name = "ROCE_QUEUE_CTRL:QCLEN TAB" }, - }, - { - { .name = "NIC_QUEUE_CTRL:QS_LEN TAB" }, - { .name = "NIC_QUEUE_CTRL:QS_TPTR TAB" }, - { .name = "NIC_QUEUE_CTRL:QS_HPTR TAB" }, - { .name = "NIC_QUEUE_CTRL:QLINKLIST TAB" }, - { .name = "NIC_QUEUE_CTRL:QCLEN TAB" }, - }, - { - { .name = "RAM_CFG_CTRL:CSHAP TAB" }, - { .name = "RAM_CFG_CTRL:PSHAP TAB" }, - }, - { - { .name = "SHAPER_CTRL:PSHAP TAB" }, - }, - { - { .name = "MSCH_CTRL" }, - }, - { - { .name = "TOP_CTRL" }, - }, -}; - -static const struct hclge_hw_error hclge_tm_sch_err_int[] = { - { .int_msk = BIT(0), .msg = "tm_sch_ecc_1bit_err" }, +static const struct hclge_hw_error hclge_tm_sch_rint[] = { { .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err" }, - { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_full_err" }, - { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_empty_err" }, - { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_full_err" }, - { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_empty_err" }, - { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_full_err" }, - { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_empty_err" }, - { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_full_err" }, - { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_empty_err" }, - { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_full_err" }, - { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_empty_err" }, + { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err" }, + { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err" }, + { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err" }, + { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err" }, + { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err" }, + { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err" }, + { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err" }, + { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err" }, + { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err" }, + { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err" }, { .int_msk = BIT(12), - .msg = "tm_sch_port_shap_offset_fifo_wr_full_err" }, + .msg = "tm_sch_port_shap_offset_fifo_wr_err" }, { .int_msk = BIT(13), - .msg = "tm_sch_port_shap_offset_fifo_rd_empty_err" }, + .msg = "tm_sch_port_shap_offset_fifo_rd_err" }, { .int_msk = BIT(14), - .msg = "tm_sch_pg_pshap_offset_fifo_wr_full_err" }, + .msg = "tm_sch_pg_pshap_offset_fifo_wr_err" }, { .int_msk = BIT(15), - .msg = "tm_sch_pg_pshap_offset_fifo_rd_empty_err" }, + .msg = "tm_sch_pg_pshap_offset_fifo_rd_err" }, { .int_msk = BIT(16), - .msg = "tm_sch_pg_cshap_offset_fifo_wr_full_err" }, + .msg = "tm_sch_pg_cshap_offset_fifo_wr_err" }, { .int_msk = BIT(17), - .msg = "tm_sch_pg_cshap_offset_fifo_rd_empty_err" }, + .msg = "tm_sch_pg_cshap_offset_fifo_rd_err" }, { .int_msk = BIT(18), - .msg = "tm_sch_pri_pshap_offset_fifo_wr_full_err" }, + .msg = "tm_sch_pri_pshap_offset_fifo_wr_err" }, { .int_msk = BIT(19), - .msg = "tm_sch_pri_pshap_offset_fifo_rd_empty_err" }, + .msg = "tm_sch_pri_pshap_offset_fifo_rd_err" }, { .int_msk = BIT(20), - .msg = "tm_sch_pri_cshap_offset_fifo_wr_full_err" }, + .msg = "tm_sch_pri_cshap_offset_fifo_wr_err" }, { .int_msk = BIT(21), - .msg = "tm_sch_pri_cshap_offset_fifo_rd_empty_err" }, - { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_full_err" }, - { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_empty_err" }, - { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_full_err" }, - { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_empty_err" }, - { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_full_err" }, - { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_empty_err" }, - { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_full_err" }, - { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_empty_err" }, - { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_full_err" }, - { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_empty_err" }, + .msg = "tm_sch_pri_cshap_offset_fifo_rd_err" }, + { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err" }, + { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err" }, + { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err" }, + { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err" }, + { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err" }, + { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err" }, + { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err" }, + { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err" }, + { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err" }, + { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_qcn_fifo_rint[] = { + { .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err" }, + { .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err" }, + { .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err" }, + { .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err" }, + { .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err" }, + { .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err" }, + { .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err" }, + { .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err" }, + { .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err" }, + { .int_msk = BIT(9), .msg = "qcn_shap_gp0_offser_fifo_wr_err" }, + { .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err" }, + { .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err" }, + { .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err" }, + { .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err" }, + { .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err" }, + { .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err" }, + { .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err" }, + { .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err" }, { /* sentinel */ } }; -static const struct hclge_hw_error hclge_qcn_ecc_err_int[] = { - { .int_msk = BIT(0), .msg = "qcn_byte_mem_ecc_1bit_err" }, +static const struct hclge_hw_error hclge_qcn_ecc_rint[] = { { .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err" }, - { .int_msk = BIT(2), .msg = "qcn_time_mem_ecc_1bit_err" }, { .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err" }, - { .int_msk = BIT(4), .msg = "qcn_fb_mem_ecc_1bit_err" }, { .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err" }, - { .int_msk = BIT(6), .msg = "qcn_link_mem_ecc_1bit_err" }, { .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err" }, - { .int_msk = BIT(8), .msg = "qcn_rate_mem_ecc_1bit_err" }, { .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err" }, - { .int_msk = BIT(10), .msg = "qcn_tmplt_mem_ecc_1bit_err" }, { .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err" }, - { .int_msk = BIT(12), .msg = "qcn_shap_cfg_mem_ecc_1bit_err" }, { .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err" }, - { .int_msk = BIT(14), .msg = "qcn_gp0_barrel_mem_ecc_1bit_err" }, { .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err" }, - { .int_msk = BIT(16), .msg = "qcn_gp1_barrel_mem_ecc_1bit_err" }, { .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err" }, - { .int_msk = BIT(18), .msg = "qcn_gp2_barrel_mem_ecc_1bit_err" }, { .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err" }, - { .int_msk = BIT(20), .msg = "qcn_gp3_barral_mem_ecc_1bit_err" }, { .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err" }, { /* sentinel */ } }; -static void hclge_log_error(struct device *dev, - const struct hclge_hw_error *err_list, +static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = { + { .int_msk = BIT(0), .msg = "egu_cge_afifo_ecc_1bit_err" }, + { .int_msk = BIT(1), .msg = "egu_cge_afifo_ecc_mbit_err" }, + { .int_msk = BIT(2), .msg = "egu_lge_afifo_ecc_1bit_err" }, + { .int_msk = BIT(3), .msg = "egu_lge_afifo_ecc_mbit_err" }, + { .int_msk = BIT(4), .msg = "cge_igu_afifo_ecc_1bit_err" }, + { .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err" }, + { .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err" }, + { .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = { + { .int_msk = BIT(13), .msg = "rpu_rx_pkt_bit32_ecc_mbit_err" }, + { .int_msk = BIT(14), .msg = "rpu_rx_pkt_bit33_ecc_mbit_err" }, + { .int_msk = BIT(15), .msg = "rpu_rx_pkt_bit34_ecc_mbit_err" }, + { .int_msk = BIT(16), .msg = "rpu_rx_pkt_bit35_ecc_mbit_err" }, + { .int_msk = BIT(17), .msg = "rcb_tx_ring_ecc_mbit_err" }, + { .int_msk = BIT(18), .msg = "rcb_rx_ring_ecc_mbit_err" }, + { .int_msk = BIT(19), .msg = "rcb_tx_fbd_ecc_mbit_err" }, + { .int_msk = BIT(20), .msg = "rcb_rx_ebd_ecc_mbit_err" }, + { .int_msk = BIT(21), .msg = "rcb_tso_info_ecc_mbit_err" }, + { .int_msk = BIT(22), .msg = "rcb_tx_int_info_ecc_mbit_err" }, + { .int_msk = BIT(23), .msg = "rcb_rx_int_info_ecc_mbit_err" }, + { .int_msk = BIT(24), .msg = "tpu_tx_pkt_0_ecc_mbit_err" }, + { .int_msk = BIT(25), .msg = "tpu_tx_pkt_1_ecc_mbit_err" }, + { .int_msk = BIT(26), .msg = "rd_bus_err" }, + { .int_msk = BIT(27), .msg = "wr_bus_err" }, + { .int_msk = BIT(28), .msg = "reg_search_miss" }, + { .int_msk = BIT(29), .msg = "rx_q_search_miss" }, + { .int_msk = BIT(30), .msg = "ooo_ecc_err_detect" }, + { .int_msk = BIT(31), .msg = "ooo_ecc_err_multpl" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = { + { .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err" }, + { .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err" }, + { .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err" }, + { .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = { + { .int_msk = BIT(0), .msg = "over_8bd_no_fe" }, + { .int_msk = BIT(1), .msg = "tso_mss_cmp_min_err" }, + { .int_msk = BIT(2), .msg = "tso_mss_cmp_max_err" }, + { .int_msk = BIT(3), .msg = "tx_rd_fbd_poison" }, + { .int_msk = BIT(4), .msg = "rx_rd_ebd_poison" }, + { .int_msk = BIT(5), .msg = "buf_wait_timeout" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ssu_com_err_int[] = { + { .int_msk = BIT(0), .msg = "buf_sum_err" }, + { .int_msk = BIT(1), .msg = "ppp_mb_num_err" }, + { .int_msk = BIT(2), .msg = "ppp_mbid_err" }, + { .int_msk = BIT(3), .msg = "ppp_rlt_mac_err" }, + { .int_msk = BIT(4), .msg = "ppp_rlt_host_err" }, + { .int_msk = BIT(5), .msg = "cks_edit_position_err" }, + { .int_msk = BIT(6), .msg = "cks_edit_condition_err" }, + { .int_msk = BIT(7), .msg = "vlan_edit_condition_err" }, + { .int_msk = BIT(8), .msg = "vlan_num_ot_err" }, + { .int_msk = BIT(9), .msg = "vlan_num_in_err" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = { + { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port" }, + { .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port" }, + { .int_msk = BIT(2), .msg = "igu_pkt_without_key_port" }, + { .int_msk = BIT(3), .msg = "roc_eof_mis_match_port" }, + { .int_msk = BIT(4), .msg = "tpu_eof_mis_match_port" }, + { .int_msk = BIT(5), .msg = "igu_eof_mis_match_port" }, + { .int_msk = BIT(6), .msg = "roc_sof_mis_match_port" }, + { .int_msk = BIT(7), .msg = "tpu_sof_mis_match_port" }, + { .int_msk = BIT(8), .msg = "igu_sof_mis_match_port" }, + { .int_msk = BIT(11), .msg = "ets_rd_int_rx_port" }, + { .int_msk = BIT(12), .msg = "ets_wr_int_rx_port" }, + { .int_msk = BIT(13), .msg = "ets_rd_int_tx_port" }, + { .int_msk = BIT(14), .msg = "ets_wr_int_tx_port" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = { + { .int_msk = BIT(0), .msg = "ig_mac_inf_int" }, + { .int_msk = BIT(1), .msg = "ig_host_inf_int" }, + { .int_msk = BIT(2), .msg = "ig_roc_buf_int" }, + { .int_msk = BIT(3), .msg = "ig_host_data_fifo_int" }, + { .int_msk = BIT(4), .msg = "ig_host_key_fifo_int" }, + { .int_msk = BIT(5), .msg = "tx_qcn_fifo_int" }, + { .int_msk = BIT(6), .msg = "rx_qcn_fifo_int" }, + { .int_msk = BIT(7), .msg = "tx_pf_rd_fifo_int" }, + { .int_msk = BIT(8), .msg = "rx_pf_rd_fifo_int" }, + { .int_msk = BIT(9), .msg = "qm_eof_fifo_int" }, + { .int_msk = BIT(10), .msg = "mb_rlt_fifo_int" }, + { .int_msk = BIT(11), .msg = "dup_uncopy_fifo_int" }, + { .int_msk = BIT(12), .msg = "dup_cnt_rd_fifo_int" }, + { .int_msk = BIT(13), .msg = "dup_cnt_drop_fifo_int" }, + { .int_msk = BIT(14), .msg = "dup_cnt_wrb_fifo_int" }, + { .int_msk = BIT(15), .msg = "host_cmd_fifo_int" }, + { .int_msk = BIT(16), .msg = "mac_cmd_fifo_int" }, + { .int_msk = BIT(17), .msg = "host_cmd_bitmap_empty_int" }, + { .int_msk = BIT(18), .msg = "mac_cmd_bitmap_empty_int" }, + { .int_msk = BIT(19), .msg = "dup_bitmap_empty_int" }, + { .int_msk = BIT(20), .msg = "out_queue_bitmap_empty_int" }, + { .int_msk = BIT(21), .msg = "bank2_bitmap_empty_int" }, + { .int_msk = BIT(22), .msg = "bank1_bitmap_empty_int" }, + { .int_msk = BIT(23), .msg = "bank0_bitmap_empty_int" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = { + { .int_msk = BIT(0), .msg = "ets_rd_int_rx_tcg" }, + { .int_msk = BIT(1), .msg = "ets_wr_int_rx_tcg" }, + { .int_msk = BIT(2), .msg = "ets_rd_int_tx_tcg" }, + { .int_msk = BIT(3), .msg = "ets_wr_int_tx_tcg" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = { + { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port" }, + { .int_msk = BIT(9), .msg = "low_water_line_err_port" }, + { .int_msk = BIT(10), .msg = "hi_water_line_err_port" }, + { /* sentinel */ } +}; + +static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = { + { .int_msk = 0, .msg = "rocee qmm ovf: sgid invalid err" }, + { .int_msk = 0x4, .msg = "rocee qmm ovf: sgid ovf err" }, + { .int_msk = 0x8, .msg = "rocee qmm ovf: smac invalid err" }, + { .int_msk = 0xC, .msg = "rocee qmm ovf: smac ovf err" }, + { .int_msk = 0x10, .msg = "rocee qmm ovf: cqc invalid err" }, + { .int_msk = 0x11, .msg = "rocee qmm ovf: cqc ovf err" }, + { .int_msk = 0x12, .msg = "rocee qmm ovf: cqc hopnum err" }, + { .int_msk = 0x13, .msg = "rocee qmm ovf: cqc ba0 err" }, + { .int_msk = 0x14, .msg = "rocee qmm ovf: srqc invalid err" }, + { .int_msk = 0x15, .msg = "rocee qmm ovf: srqc ovf err" }, + { .int_msk = 0x16, .msg = "rocee qmm ovf: srqc hopnum err" }, + { .int_msk = 0x17, .msg = "rocee qmm ovf: srqc ba0 err" }, + { .int_msk = 0x18, .msg = "rocee qmm ovf: mpt invalid err" }, + { .int_msk = 0x19, .msg = "rocee qmm ovf: mpt ovf err" }, + { .int_msk = 0x1A, .msg = "rocee qmm ovf: mpt hopnum err" }, + { .int_msk = 0x1B, .msg = "rocee qmm ovf: mpt ba0 err" }, + { .int_msk = 0x1C, .msg = "rocee qmm ovf: qpc invalid err" }, + { .int_msk = 0x1D, .msg = "rocee qmm ovf: qpc ovf err" }, + { .int_msk = 0x1E, .msg = "rocee qmm ovf: qpc hopnum err" }, + { .int_msk = 0x1F, .msg = "rocee qmm ovf: qpc ba0 err" }, + { /* sentinel */ } +}; + +static void hclge_log_error(struct device *dev, char *reg, + const struct hclge_hw_error *err, u32 err_sts) { - const struct hclge_hw_error *err; - int i = 0; - - while (err_list[i].msg) { - err = &err_list[i]; - if (!(err->int_msk & err_sts)) { - i++; - continue; - } - dev_warn(dev, "%s [error status=0x%x] found\n", - err->msg, err_sts); - i++; + while (err->msg) { + if (err->int_msk & err_sts) + dev_warn(dev, "%s %s found [error status=0x%x]\n", + reg, err->msg, err_sts); + err++; } } @@ -391,96 +409,44 @@ static int hclge_cmd_query_error(struct hclge_dev *hdev, return ret; } -/* hclge_cmd_clear_error: clear the error status - * @hdev: pointer to struct hclge_dev - * @desc: descriptor for describing the command - * @desc_src: prefilled descriptor from the previous command for reusing - * @cmd: command opcode - * @flag: flag for extended command structure - * - * This function clear the error status in the hw register/s using command - */ -static int hclge_cmd_clear_error(struct hclge_dev *hdev, - struct hclge_desc *desc, - struct hclge_desc *desc_src, - u32 cmd, u16 flag) -{ - struct device *dev = &hdev->pdev->dev; - int num = 1; - int ret, i; - - if (cmd) { - hclge_cmd_setup_basic_desc(&desc[0], cmd, false); - if (flag) { - desc[0].flag |= cpu_to_le16(flag); - hclge_cmd_setup_basic_desc(&desc[1], cmd, false); - num = 2; - } - if (desc_src) { - for (i = 0; i < 6; i++) { - desc[0].data[i] = desc_src[0].data[i]; - if (flag) - desc[1].data[i] = desc_src[1].data[i]; - } - } - } else { - hclge_cmd_reuse_desc(&desc[0], false); - if (flag) { - desc[0].flag |= cpu_to_le16(flag); - hclge_cmd_reuse_desc(&desc[1], false); - num = 2; - } - } - ret = hclge_cmd_send(&hdev->hw, &desc[0], num); - if (ret) - dev_err(dev, "clear error cmd failed (%d)\n", ret); - - return ret; -} - -static int hclge_enable_common_error(struct hclge_dev *hdev, bool en) +static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en) { struct device *dev = &hdev->pdev->dev; struct hclge_desc desc[2]; int ret; + /* configure common error interrupts */ hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false); desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false); if (en) { - /* enable COMMON error interrupts */ desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN); desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN | HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN); desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN); - desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN); + desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN | + HCLGE_MSIX_SRAM_ECC_ERR_INT_EN); desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN); - } else { - /* disable COMMON error interrupts */ - desc[0].data[0] = 0; - desc[0].data[2] = 0; - desc[0].data[3] = 0; - desc[0].data[4] = 0; - desc[0].data[5] = 0; } + desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK); desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK | HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK); desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK); - desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK); + desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK | + HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK); desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK); ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); if (ret) dev_err(dev, - "failed(%d) to enable/disable COMMON err interrupts\n", - ret); + "fail(%d) to configure common err interrupts\n", ret); return ret; } -static int hclge_enable_ncsi_error(struct hclge_dev *hdev, bool en) +static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en) { struct device *dev = &hdev->pdev->dev; struct hclge_desc desc; @@ -489,74 +455,65 @@ static int hclge_enable_ncsi_error(struct hclge_dev *hdev, bool en) if (hdev->pdev->revision < 0x21) return 0; - /* enable/disable NCSI error interrupts */ + /* configure NCSI error interrupts */ hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false); if (en) desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN); - else - desc.data[0] = 0; ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) dev_err(dev, - "failed(%d) to enable/disable NCSI error interrupts\n", - ret); + "fail(%d) to configure NCSI error interrupts\n", ret); return ret; } -static int hclge_enable_igu_egu_error(struct hclge_dev *hdev, bool en) +static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en) { struct device *dev = &hdev->pdev->dev; struct hclge_desc desc; int ret; - /* enable/disable error interrupts */ + /* configure IGU,EGU error interrupts */ hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false); if (en) desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN); - else - desc.data[0] = 0; + desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(dev, - "failed(%d) to enable/disable IGU common interrupts\n", - ret); + "fail(%d) to configure IGU common interrupts\n", ret); return ret; } hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false); if (en) desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN); - else - desc.data[0] = 0; + desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(dev, - "failed(%d) to enable/disable IGU-EGU TNL interrupts\n", - ret); + "fail(%d) to configure IGU-EGU TNL interrupts\n", ret); return ret; } - ret = hclge_enable_ncsi_error(hdev, en); - if (ret) - dev_err(dev, "fail(%d) to en/disable err int\n", ret); + ret = hclge_config_ncsi_hw_err_int(hdev, en); return ret; } -static int hclge_enable_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd, +static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd, bool en) { struct device *dev = &hdev->pdev->dev; struct hclge_desc desc[2]; int ret; - /* enable/disable PPP error interrupts */ + /* configure PPP error interrupts */ hclge_cmd_setup_basic_desc(&desc[0], cmd, false); desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); hclge_cmd_setup_basic_desc(&desc[1], cmd, false); @@ -567,24 +524,24 @@ static int hclge_enable_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd, cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN); desc[0].data[1] = cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN); - } else { - desc[0].data[0] = 0; - desc[0].data[1] = 0; + desc[0].data[4] = cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN); } + desc[1].data[0] = cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK); desc[1].data[1] = cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK); + if (hdev->pdev->revision >= 0x21) + desc[1].data[2] = + cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK); } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) { if (en) { desc[0].data[0] = cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN); desc[0].data[1] = cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN); - } else { - desc[0].data[0] = 0; - desc[0].data[1] = 0; } + desc[1].data[0] = cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK); desc[1].data[1] = @@ -593,491 +550,863 @@ static int hclge_enable_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd, ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); if (ret) - dev_err(dev, - "failed(%d) to enable/disable PPP error interrupts\n", - ret); + dev_err(dev, "fail(%d) to configure PPP error intr\n", ret); return ret; } -static int hclge_enable_ppp_error(struct hclge_dev *hdev, bool en) +static int hclge_config_ppp_hw_err_int(struct hclge_dev *hdev, bool en) { - struct device *dev = &hdev->pdev->dev; int ret; - ret = hclge_enable_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD, + ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD, en); - if (ret) { - dev_err(dev, - "failed(%d) to enable/disable PPP error intr 0,1\n", - ret); + if (ret) return ret; - } - ret = hclge_enable_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD, + ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD, en); - if (ret) - dev_err(dev, - "failed(%d) to enable/disable PPP error intr 2,3\n", - ret); return ret; } -int hclge_enable_tm_hw_error(struct hclge_dev *hdev, bool en) +static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en) { struct device *dev = &hdev->pdev->dev; struct hclge_desc desc; int ret; - /* enable TM SCH hw errors */ + /* configure TM SCH hw errors */ hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false); if (en) desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN); - else - desc.data[0] = 0; ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { - dev_err(dev, "failed(%d) to configure TM SCH errors\n", ret); + dev_err(dev, "fail(%d) to configure TM SCH errors\n", ret); return ret; } - /* enable TM QCN hw errors */ + /* configure TM QCN hw errors */ ret = hclge_cmd_query_error(hdev, &desc, HCLGE_TM_QCN_MEM_INT_CFG, 0, 0, 0); if (ret) { - dev_err(dev, "failed(%d) to read TM QCN CFG status\n", ret); + dev_err(dev, "fail(%d) to read TM QCN CFG status\n", ret); return ret; } hclge_cmd_reuse_desc(&desc, false); if (en) desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN); - else - desc.data[1] = 0; ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) dev_err(dev, - "failed(%d) to configure TM QCN mem errors\n", ret); + "fail(%d) to configure TM QCN mem errors\n", ret); return ret; } -static void hclge_process_common_error(struct hclge_dev *hdev, - enum hclge_err_int_type type) +static int hclge_config_mac_err_int(struct hclge_dev *hdev, bool en) { struct device *dev = &hdev->pdev->dev; - struct hclge_desc desc[2]; - u32 err_sts; + struct hclge_desc desc; int ret; - /* read err sts */ - ret = hclge_cmd_query_error(hdev, &desc[0], - HCLGE_COMMON_ECC_INT_CFG, - HCLGE_CMD_FLAG_NEXT, 0, 0); - if (ret) { + /* configure MAC common error interrupts */ + hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_COMMON_INT_EN, false); + if (en) + desc.data[0] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN); + + desc.data[1] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN_MASK); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) dev_err(dev, - "failed(=%d) to query COMMON error interrupt status\n", - ret); - return; - } + "fail(%d) to configure MAC COMMON error intr\n", ret); - /* log err */ - err_sts = (le32_to_cpu(desc[0].data[0])) & HCLGE_IMP_TCM_ECC_INT_MASK; - hclge_log_error(dev, &hclge_imp_tcm_ecc_int[0], err_sts); + return ret; +} - err_sts = (le32_to_cpu(desc[0].data[1])) & HCLGE_CMDQ_ECC_INT_MASK; - hclge_log_error(dev, &hclge_cmdq_nic_mem_ecc_int[0], err_sts); +static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd, + bool en) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc[2]; + int num = 1; + int ret; - err_sts = (le32_to_cpu(desc[0].data[1]) >> HCLGE_CMDQ_ROC_ECC_INT_SHIFT) - & HCLGE_CMDQ_ECC_INT_MASK; - hclge_log_error(dev, &hclge_cmdq_rocee_mem_ecc_int[0], err_sts); + /* configure PPU error interrupts */ + if (cmd == HCLGE_PPU_MPF_ECC_INT_CMD) { + hclge_cmd_setup_basic_desc(&desc[0], cmd, false); + desc[0].flag |= HCLGE_CMD_FLAG_NEXT; + hclge_cmd_setup_basic_desc(&desc[1], cmd, false); + if (en) { + desc[0].data[0] = HCLGE_PPU_MPF_ABNORMAL_INT0_EN; + desc[0].data[1] = HCLGE_PPU_MPF_ABNORMAL_INT1_EN; + desc[1].data[3] = HCLGE_PPU_MPF_ABNORMAL_INT3_EN; + desc[1].data[4] = HCLGE_PPU_MPF_ABNORMAL_INT2_EN; + } - if ((le32_to_cpu(desc[0].data[3])) & BIT(0)) - dev_warn(dev, "imp_rd_data_poison_err found\n"); + desc[1].data[0] = HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK; + desc[1].data[1] = HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK; + desc[1].data[2] = HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK; + desc[1].data[3] |= HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK; + num = 2; + } else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) { + hclge_cmd_setup_basic_desc(&desc[0], cmd, false); + if (en) + desc[0].data[0] = HCLGE_PPU_MPF_ABNORMAL_INT2_EN2; - err_sts = (le32_to_cpu(desc[0].data[3]) >> HCLGE_TQP_ECC_INT_SHIFT) & - HCLGE_TQP_ECC_INT_MASK; - hclge_log_error(dev, &hclge_tqp_int_ecc_int[0], err_sts); + desc[0].data[2] = HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK; + } else if (cmd == HCLGE_PPU_PF_OTHER_INT_CMD) { + hclge_cmd_setup_basic_desc(&desc[0], cmd, false); + if (en) + desc[0].data[0] = HCLGE_PPU_PF_ABNORMAL_INT_EN; - err_sts = (le32_to_cpu(desc[0].data[5])) & - HCLGE_IMP_ITCM4_ECC_INT_MASK; - hclge_log_error(dev, &hclge_imp_itcm4_ecc_int[0], err_sts); + desc[0].data[2] = HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK; + } else { + dev_err(dev, "Invalid cmd to configure PPU error interrupts\n"); + return -EINVAL; + } - /* clear error interrupts */ - desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_CLR_MASK); - desc[1].data[1] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_CLR_MASK | - HCLGE_CMDQ_ROCEE_ECC_CLR_MASK); - desc[1].data[3] = cpu_to_le32(HCLGE_TQP_IMP_ERR_CLR_MASK); - desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_CLR_MASK); + ret = hclge_cmd_send(&hdev->hw, &desc[0], num); - ret = hclge_cmd_clear_error(hdev, &desc[0], NULL, 0, - HCLGE_CMD_FLAG_NEXT); - if (ret) - dev_err(dev, - "failed(%d) to clear COMMON error interrupt status\n", - ret); + return ret; } -static void hclge_process_ncsi_error(struct hclge_dev *hdev, - enum hclge_err_int_type type) +static int hclge_config_ppu_hw_err_int(struct hclge_dev *hdev, bool en) { struct device *dev = &hdev->pdev->dev; - struct hclge_desc desc_rd; - struct hclge_desc desc_wr; - u32 err_sts; int ret; - if (hdev->pdev->revision < 0x21) - return; - - /* read NCSI error status */ - ret = hclge_cmd_query_error(hdev, &desc_rd, HCLGE_NCSI_INT_QUERY, - 0, 1, HCLGE_NCSI_ERR_INT_TYPE); + ret = hclge_config_ppu_error_interrupts(hdev, HCLGE_PPU_MPF_ECC_INT_CMD, + en); if (ret) { - dev_err(dev, - "failed(=%d) to query NCSI error interrupt status\n", + dev_err(dev, "fail(%d) to configure PPU MPF ECC error intr\n", ret); - return; + return ret; } - /* log err */ - err_sts = le32_to_cpu(desc_rd.data[0]); - hclge_log_error(dev, &hclge_ncsi_err_int[0], err_sts); + ret = hclge_config_ppu_error_interrupts(hdev, + HCLGE_PPU_MPF_OTHER_INT_CMD, + en); + if (ret) { + dev_err(dev, "fail(%d) to configure PPU MPF other intr\n", ret); + return ret; + } - /* clear err int */ - ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd, - HCLGE_NCSI_INT_CLR, 0); + ret = hclge_config_ppu_error_interrupts(hdev, + HCLGE_PPU_PF_OTHER_INT_CMD, en); if (ret) - dev_err(dev, "failed(=%d) to clear NCSI interrupt status\n", + dev_err(dev, "fail(%d) to configure PPU PF error interrupts\n", ret); + return ret; } -static void hclge_process_igu_egu_error(struct hclge_dev *hdev, - enum hclge_err_int_type int_type) +static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en) { struct device *dev = &hdev->pdev->dev; - struct hclge_desc desc_rd; - struct hclge_desc desc_wr; - u32 err_sts; + struct hclge_desc desc[2]; int ret; - /* read IGU common err sts */ - ret = hclge_cmd_query_error(hdev, &desc_rd, - HCLGE_IGU_COMMON_INT_QUERY, - 0, 1, int_type); - if (ret) { - dev_err(dev, "failed(=%d) to query IGU common int status\n", - ret); - return; + /* configure SSU ecc error interrupts */ + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_ECC_INT_CMD, false); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_ECC_INT_CMD, false); + if (en) { + desc[0].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN); + desc[0].data[1] = + cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN); + desc[0].data[4] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN); } - /* log err */ - err_sts = le32_to_cpu(desc_rd.data[0]) & - HCLGE_IGU_COM_INT_MASK; - hclge_log_error(dev, &hclge_igu_com_err_int[0], err_sts); + desc[1].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK); + desc[1].data[1] = cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK); + desc[1].data[2] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK); - /* clear err int */ - ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd, - HCLGE_IGU_COMMON_INT_CLR, 0); + ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); if (ret) { - dev_err(dev, "failed(=%d) to clear IGU common int status\n", - ret); - return; + dev_err(dev, + "fail(%d) to configure SSU ECC error interrupt\n", ret); + return ret; } - /* read IGU-EGU TNL err sts */ - ret = hclge_cmd_query_error(hdev, &desc_rd, - HCLGE_IGU_EGU_TNL_INT_QUERY, - 0, 1, int_type); - if (ret) { - dev_err(dev, "failed(=%d) to query IGU-EGU TNL int status\n", - ret); - return; + /* configure SSU common error interrupts */ + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_COMMON_INT_CMD, false); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false); + + if (en) { + if (hdev->pdev->revision >= 0x21) + desc[0].data[0] = + cpu_to_le32(HCLGE_SSU_COMMON_INT_EN); + else + desc[0].data[0] = + cpu_to_le32(HCLGE_SSU_COMMON_INT_EN & ~BIT(5)); + desc[0].data[1] = cpu_to_le32(HCLGE_SSU_PORT_BASED_ERR_INT_EN); + desc[0].data[2] = + cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN); } - /* log err */ - err_sts = le32_to_cpu(desc_rd.data[0]) & - HCLGE_IGU_EGU_TNL_INT_MASK; - hclge_log_error(dev, &hclge_igu_egu_tnl_err_int[0], err_sts); + desc[1].data[0] = cpu_to_le32(HCLGE_SSU_COMMON_INT_EN_MASK | + HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK); + desc[1].data[1] = cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK); - /* clear err int */ - ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd, - HCLGE_IGU_EGU_TNL_INT_CLR, 0); - if (ret) { - dev_err(dev, "failed(=%d) to clear IGU-EGU TNL int status\n", - ret); - return; - } + ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); + if (ret) + dev_err(dev, + "fail(%d) to configure SSU COMMON error intr\n", ret); - hclge_process_ncsi_error(hdev, HCLGE_ERR_INT_RAS_NFE); + return ret; } -static int hclge_log_and_clear_ppp_error(struct hclge_dev *hdev, u32 cmd, - enum hclge_err_int_type int_type) +#define HCLGE_SET_DEFAULT_RESET_REQUEST(reset_type) \ + do { \ + if (ae_dev->ops->set_default_reset_request) \ + ae_dev->ops->set_default_reset_request(ae_dev, \ + reset_type); \ + } while (0) + +/* hclge_handle_mpf_ras_error: handle all main PF RAS errors + * @hdev: pointer to struct hclge_dev + * @desc: descriptor for describing the command + * @num: number of extended command structures + * + * This function handles all the main PF RAS errors in the + * hw register/s using command. + */ +static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, + struct hclge_desc *desc, + int num) { + struct hnae3_ae_dev *ae_dev = hdev->ae_dev; struct device *dev = &hdev->pdev->dev; - const struct hclge_hw_error *hw_err_lst1, *hw_err_lst2, *hw_err_lst3; - struct hclge_desc desc[2]; - u32 err_sts; + __le32 *desc_data; + u32 status; int ret; - /* read PPP INT sts */ - ret = hclge_cmd_query_error(hdev, &desc[0], cmd, - HCLGE_CMD_FLAG_NEXT, 5, int_type); + /* query all main PF RAS errors */ + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT, + true); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], num); if (ret) { - dev_err(dev, "failed(=%d) to query PPP interrupt status\n", - ret); - return -EIO; + dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret); + return ret; } - /* log error */ - if (cmd == HCLGE_PPP_CMD0_INT_CMD) { - hw_err_lst1 = &hclge_ppp_mpf_int0[0]; - hw_err_lst2 = &hclge_ppp_mpf_int1[0]; - hw_err_lst3 = &hclge_ppp_pf_int[0]; - } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) { - hw_err_lst1 = &hclge_ppp_mpf_int2[0]; - hw_err_lst2 = &hclge_ppp_mpf_int3[0]; - } else { - dev_err(dev, "invalid command(=%d)\n", cmd); - return -EINVAL; + /* log HNS common errors */ + status = le32_to_cpu(desc[0].data[0]); + if (status) { + hclge_log_error(dev, "IMP_TCM_ECC_INT_STS", + &hclge_imp_tcm_ecc_int[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET); } - err_sts = le32_to_cpu(desc[0].data[2]); - if (err_sts) - hclge_log_error(dev, hw_err_lst1, err_sts); + status = le32_to_cpu(desc[0].data[1]); + if (status) { + hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS", + &hclge_cmdq_nic_mem_ecc_int[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET); + } - err_sts = le32_to_cpu(desc[0].data[3]); - if (err_sts) - hclge_log_error(dev, hw_err_lst2, err_sts); + if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) { + dev_warn(dev, "imp_rd_data_poison_err found\n"); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET); + } - if (cmd == HCLGE_PPP_CMD0_INT_CMD) { - err_sts = (le32_to_cpu(desc[0].data[4]) >> 8) & 0x3; - if (err_sts) - hclge_log_error(dev, hw_err_lst3, err_sts); + status = le32_to_cpu(desc[0].data[3]); + if (status) { + hclge_log_error(dev, "TQP_INT_ECC_INT_STS", + &hclge_tqp_int_ecc_int[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); } - /* clear PPP INT */ - ret = hclge_cmd_clear_error(hdev, &desc[0], NULL, 0, - HCLGE_CMD_FLAG_NEXT); - if (ret) { - dev_err(dev, "failed(=%d) to clear PPP interrupt status\n", - ret); - return -EIO; + status = le32_to_cpu(desc[0].data[4]); + if (status) { + hclge_log_error(dev, "MSIX_ECC_INT_STS", + &hclge_msix_sram_ecc_int[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); } - return 0; + /* log SSU(Storage Switch Unit) errors */ + desc_data = (__le32 *)&desc[2]; + status = le32_to_cpu(*(desc_data + 2)); + if (status) { + dev_warn(dev, "SSU_ECC_MULTI_BIT_INT_0 ssu_ecc_mbit_int[31:0]\n"); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); + } + + status = le32_to_cpu(*(desc_data + 3)) & BIT(0); + if (status) { + dev_warn(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_ecc_mbit_int[32]\n"); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); + } + + status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK; + if (status) { + hclge_log_error(dev, "SSU_COMMON_ERR_INT", + &hclge_ssu_com_err_int[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET); + } + + /* log IGU(Ingress Unit) errors */ + desc_data = (__le32 *)&desc[3]; + status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK; + if (status) + hclge_log_error(dev, "IGU_INT_STS", + &hclge_igu_int[0], status); + + /* log PPP(Programmable Packet Process) errors */ + desc_data = (__le32 *)&desc[4]; + status = le32_to_cpu(*(desc_data + 1)); + if (status) + hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1", + &hclge_ppp_mpf_abnormal_int_st1[0], status); + + status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK; + if (status) + hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3", + &hclge_ppp_mpf_abnormal_int_st3[0], status); + + /* log PPU(RCB) errors */ + desc_data = (__le32 *)&desc[5]; + status = le32_to_cpu(*(desc_data + 1)); + if (status) { + dev_warn(dev, "PPU_MPF_ABNORMAL_INT_ST1 %s found\n", + "rpu_rx_pkt_ecc_mbit_err"); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); + } + + status = le32_to_cpu(*(desc_data + 2)); + if (status) { + hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", + &hclge_ppu_mpf_abnormal_int_st2[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); + } + + status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK; + if (status) { + hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3", + &hclge_ppu_mpf_abnormal_int_st3[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); + } + + /* log TM(Traffic Manager) errors */ + desc_data = (__le32 *)&desc[6]; + status = le32_to_cpu(*desc_data); + if (status) { + hclge_log_error(dev, "TM_SCH_RINT", + &hclge_tm_sch_rint[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); + } + + /* log QCN(Quantized Congestion Control) errors */ + desc_data = (__le32 *)&desc[7]; + status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK; + if (status) { + hclge_log_error(dev, "QCN_FIFO_RINT", + &hclge_qcn_fifo_rint[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); + } + + status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK; + if (status) { + hclge_log_error(dev, "QCN_ECC_RINT", + &hclge_qcn_ecc_rint[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); + } + + /* log NCSI errors */ + desc_data = (__le32 *)&desc[9]; + status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK; + if (status) { + hclge_log_error(dev, "NCSI_ECC_INT_RPT", + &hclge_ncsi_err_int[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_CORE_RESET); + } + + /* clear all main PF RAS errors */ + hclge_cmd_reuse_desc(&desc[0], false); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], num); + if (ret) + dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret); + + return ret; } -static void hclge_process_ppp_error(struct hclge_dev *hdev, - enum hclge_err_int_type int_type) +/* hclge_handle_pf_ras_error: handle all PF RAS errors + * @hdev: pointer to struct hclge_dev + * @desc: descriptor for describing the command + * @num: number of extended command structures + * + * This function handles all the PF RAS errors in the + * hw register/s using command. + */ +static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, + struct hclge_desc *desc, + int num) { + struct hnae3_ae_dev *ae_dev = hdev->ae_dev; struct device *dev = &hdev->pdev->dev; + __le32 *desc_data; + u32 status; int ret; - /* read PPP INT0,1 sts */ - ret = hclge_log_and_clear_ppp_error(hdev, HCLGE_PPP_CMD0_INT_CMD, - int_type); - if (ret < 0) { - dev_err(dev, "failed(=%d) to clear PPP interrupt 0,1 status\n", - ret); - return; + /* query all PF RAS errors */ + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT, + true); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], num); + if (ret) { + dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret); + return ret; } - /* read err PPP INT2,3 sts */ - ret = hclge_log_and_clear_ppp_error(hdev, HCLGE_PPP_CMD1_INT_CMD, - int_type); - if (ret < 0) - dev_err(dev, "failed(=%d) to clear PPP interrupt 2,3 status\n", - ret); + /* log SSU(Storage Switch Unit) errors */ + status = le32_to_cpu(desc[0].data[0]); + if (status) { + hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", + &hclge_ssu_port_based_err_int[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET); + } + + status = le32_to_cpu(desc[0].data[1]); + if (status) { + hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT", + &hclge_ssu_fifo_overflow_int[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET); + } + + status = le32_to_cpu(desc[0].data[2]); + if (status) { + hclge_log_error(dev, "SSU_ETS_TCG_INT", + &hclge_ssu_ets_tcg_int[0], status); + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET); + } + + /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */ + desc_data = (__le32 *)&desc[1]; + status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK; + if (status) + hclge_log_error(dev, "IGU_EGU_TNL_INT_STS", + &hclge_igu_egu_tnl_int[0], status); + + /* clear all PF RAS errors */ + hclge_cmd_reuse_desc(&desc[0], false); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], num); + if (ret) + dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret); + + return ret; } -static void hclge_process_tm_sch_error(struct hclge_dev *hdev) +static int hclge_handle_all_ras_errors(struct hclge_dev *hdev) { struct device *dev = &hdev->pdev->dev; - const struct hclge_tm_sch_ecc_info *tm_sch_ecc_info; - struct hclge_desc desc; - u32 ecc_info; - u8 module_no; - u8 ram_no; + u32 mpf_bd_num, pf_bd_num, bd_num; + struct hclge_desc desc_bd; + struct hclge_desc *desc; int ret; - /* read TM scheduler errors */ - ret = hclge_cmd_query_error(hdev, &desc, - HCLGE_TM_SCH_MBIT_ECC_INFO_CMD, 0, 0, 0); + /* query the number of registers in the RAS int status */ + hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_RAS_INT_STS_BD_NUM, + true); + ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); if (ret) { - dev_err(dev, "failed(%d) to read SCH mbit ECC err info\n", ret); - return; + dev_err(dev, "fail(%d) to query ras int status bd num\n", ret); + return ret; } - ecc_info = le32_to_cpu(desc.data[0]); + mpf_bd_num = le32_to_cpu(desc_bd.data[0]); + pf_bd_num = le32_to_cpu(desc_bd.data[1]); + bd_num = max_t(u32, mpf_bd_num, pf_bd_num); + + desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; - ret = hclge_cmd_query_error(hdev, &desc, - HCLGE_TM_SCH_ECC_ERR_RINT_CMD, 0, 0, 0); + /* handle all main PF RAS errors */ + ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num); if (ret) { - dev_err(dev, "failed(%d) to read SCH ECC err status\n", ret); - return; + kfree(desc); + return ret; } + memset(desc, 0, bd_num * sizeof(struct hclge_desc)); - /* log TM scheduler errors */ - if (le32_to_cpu(desc.data[0])) { - hclge_log_error(dev, &hclge_tm_sch_err_int[0], - le32_to_cpu(desc.data[0])); - if (le32_to_cpu(desc.data[0]) & 0x2) { - module_no = (ecc_info >> 20) & 0xF; - ram_no = (ecc_info >> 16) & 0xF; - tm_sch_ecc_info = - &hclge_tm_sch_ecc_err[module_no][ram_no]; - dev_warn(dev, "ecc err module:ram=%s\n", - tm_sch_ecc_info->name); - dev_warn(dev, "ecc memory address = 0x%x\n", - ecc_info & 0xFFFF); + /* handle all PF RAS errors */ + ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num); + kfree(desc); + + return ret; +} + +static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc[2]; + int ret; + + /* read overflow error status */ + ret = hclge_cmd_query_error(hdev, &desc[0], + HCLGE_ROCEE_PF_RAS_INT_CMD, + 0, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret); + return ret; + } + + /* log overflow error */ + if (le32_to_cpu(desc[0].data[0]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { + const struct hclge_hw_error *err; + u32 err_sts; + + err = &hclge_rocee_qmm_ovf_err_int[0]; + err_sts = HCLGE_ROCEE_OVF_ERR_TYPE_MASK & + le32_to_cpu(desc[0].data[0]); + while (err->msg) { + if (err->int_msk == err_sts) { + dev_warn(dev, "%s [error status=0x%x] found\n", + err->msg, + le32_to_cpu(desc[0].data[0])); + break; + } + err++; } } - /* clear TM scheduler errors */ - ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0); - if (ret) { - dev_err(dev, "failed(%d) to clear TM SCH error status\n", ret); - return; + if (le32_to_cpu(desc[0].data[1]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { + dev_warn(dev, "ROCEE TSP OVF [error status=0x%x] found\n", + le32_to_cpu(desc[0].data[1])); } - ret = hclge_cmd_query_error(hdev, &desc, - HCLGE_TM_SCH_ECC_ERR_RINT_CE, 0, 0, 0); - if (ret) { - dev_err(dev, "failed(%d) to read SCH CE status\n", ret); - return; + if (le32_to_cpu(desc[0].data[2]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { + dev_warn(dev, "ROCEE SCC OVF [error status=0x%x] found\n", + le32_to_cpu(desc[0].data[2])); } - ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0); + return 0; +} + +static int hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) +{ + enum hnae3_reset_type reset_type = HNAE3_FUNC_RESET; + struct hnae3_ae_dev *ae_dev = hdev->ae_dev; + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc[2]; + unsigned int status; + int ret; + + /* read RAS error interrupt status */ + ret = hclge_cmd_query_error(hdev, &desc[0], + HCLGE_QUERY_CLEAR_ROCEE_RAS_INT, + 0, 0, 0); if (ret) { - dev_err(dev, "failed(%d) to clear TM SCH CE status\n", ret); - return; + dev_err(dev, "failed(%d) to query ROCEE RAS INT SRC\n", ret); + /* reset everything for now */ + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET); + return ret; } - ret = hclge_cmd_query_error(hdev, &desc, - HCLGE_TM_SCH_ECC_ERR_RINT_NFE, 0, 0, 0); - if (ret) { - dev_err(dev, "failed(%d) to read SCH NFE status\n", ret); - return; + status = le32_to_cpu(desc[0].data[0]); + + if (status & HCLGE_ROCEE_RERR_INT_MASK) + dev_warn(dev, "ROCEE RAS AXI rresp error\n"); + + if (status & HCLGE_ROCEE_BERR_INT_MASK) + dev_warn(dev, "ROCEE RAS AXI bresp error\n"); + + if (status & HCLGE_ROCEE_ECC_INT_MASK) { + dev_warn(dev, "ROCEE RAS 2bit ECC error\n"); + reset_type = HNAE3_GLOBAL_RESET; } - ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0); - if (ret) { - dev_err(dev, "failed(%d) to clear TM SCH NFE status\n", ret); - return; + if (status & HCLGE_ROCEE_OVF_INT_MASK) { + ret = hclge_log_rocee_ovf_error(hdev); + if (ret) { + dev_err(dev, "failed(%d) to process ovf error\n", ret); + /* reset everything for now */ + HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET); + return ret; + } } - ret = hclge_cmd_query_error(hdev, &desc, - HCLGE_TM_SCH_ECC_ERR_RINT_FE, 0, 0, 0); + /* clear error status */ + hclge_cmd_reuse_desc(&desc[0], false); + ret = hclge_cmd_send(&hdev->hw, &desc[0], 1); if (ret) { - dev_err(dev, "failed(%d) to read SCH FE status\n", ret); - return; + dev_err(dev, "failed(%d) to clear ROCEE RAS error\n", ret); + /* reset everything for now */ + reset_type = HNAE3_GLOBAL_RESET; } - ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0); - if (ret) - dev_err(dev, "failed(%d) to clear TM SCH FE status\n", ret); + HCLGE_SET_DEFAULT_RESET_REQUEST(reset_type); + + return ret; } -static void hclge_process_tm_qcn_error(struct hclge_dev *hdev) +static int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) { struct device *dev = &hdev->pdev->dev; struct hclge_desc desc; int ret; - /* read QCN errors */ - ret = hclge_cmd_query_error(hdev, &desc, - HCLGE_TM_QCN_MEM_INT_INFO_CMD, 0, 0, 0); - if (ret) { - dev_err(dev, "failed(%d) to read QCN ECC err status\n", ret); - return; - } + if (hdev->pdev->revision < 0x21 || !hnae3_dev_roce_supported(hdev)) + return 0; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false); + if (en) { + /* enable ROCEE hw error interrupts */ + desc.data[0] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN); + desc.data[1] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN); - /* log QCN errors */ - if (le32_to_cpu(desc.data[0])) - hclge_log_error(dev, &hclge_qcn_ecc_err_int[0], - le32_to_cpu(desc.data[0])); + hclge_log_and_clear_rocee_ras_error(hdev); + } + desc.data[2] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN_MASK); + desc.data[3] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN_MASK); - /* clear QCN errors */ - ret = hclge_cmd_clear_error(hdev, &desc, NULL, 0, 0); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) - dev_err(dev, "failed(%d) to clear QCN error status\n", ret); + dev_err(dev, "failed(%d) to config ROCEE RAS interrupt\n", ret); + + return ret; } -static void hclge_process_tm_error(struct hclge_dev *hdev, - enum hclge_err_int_type type) +static int hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev) { - hclge_process_tm_sch_error(hdev); - hclge_process_tm_qcn_error(hdev); + struct hclge_dev *hdev = ae_dev->priv; + + if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || + hdev->pdev->revision < 0x21) + return HNAE3_NONE_RESET; + + return hclge_log_and_clear_rocee_ras_error(hdev); } static const struct hclge_hw_blk hw_blk[] = { - { .msk = BIT(0), .name = "IGU_EGU", - .enable_error = hclge_enable_igu_egu_error, - .process_error = hclge_process_igu_egu_error, }, - { .msk = BIT(5), .name = "COMMON", - .enable_error = hclge_enable_common_error, - .process_error = hclge_process_common_error, }, - { .msk = BIT(4), .name = "TM", - .enable_error = hclge_enable_tm_hw_error, - .process_error = hclge_process_tm_error, }, - { .msk = BIT(1), .name = "PPP", - .enable_error = hclge_enable_ppp_error, - .process_error = hclge_process_ppp_error, }, + { + .msk = BIT(0), .name = "IGU_EGU", + .config_err_int = hclge_config_igu_egu_hw_err_int, + }, + { + .msk = BIT(1), .name = "PPP", + .config_err_int = hclge_config_ppp_hw_err_int, + }, + { + .msk = BIT(2), .name = "SSU", + .config_err_int = hclge_config_ssu_hw_err_int, + }, + { + .msk = BIT(3), .name = "PPU", + .config_err_int = hclge_config_ppu_hw_err_int, + }, + { + .msk = BIT(4), .name = "TM", + .config_err_int = hclge_config_tm_hw_err_int, + }, + { + .msk = BIT(5), .name = "COMMON", + .config_err_int = hclge_config_common_hw_err_int, + }, + { + .msk = BIT(8), .name = "MAC", + .config_err_int = hclge_config_mac_err_int, + }, { /* sentinel */ } }; int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state) { + const struct hclge_hw_blk *module = hw_blk; struct device *dev = &hdev->pdev->dev; int ret = 0; - int i = 0; - while (hw_blk[i].name) { - if (!hw_blk[i].enable_error) { - i++; - continue; - } - ret = hw_blk[i].enable_error(hdev, state); - if (ret) { - dev_err(dev, "fail(%d) to en/disable err int\n", ret); - return ret; + while (module->name) { + if (module->config_err_int) { + ret = module->config_err_int(hdev, state); + if (ret) + return ret; } - i++; + module++; } + ret = hclge_config_rocee_ras_interrupt(hdev, state); + if (ret) + dev_err(dev, "fail(%d) to configure ROCEE err int\n", ret); + return ret; } -pci_ers_result_t hclge_process_ras_hw_error(struct hnae3_ae_dev *ae_dev) +pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) { struct hclge_dev *hdev = ae_dev->priv; struct device *dev = &hdev->pdev->dev; - u32 sts, val; - int i = 0; - - sts = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); - - /* Processing Non-fatal errors */ - if (sts & HCLGE_RAS_REG_NFE_MASK) { - val = (sts >> HCLGE_RAS_REG_NFE_SHIFT) & 0xFF; - i = 0; - while (hw_blk[i].name) { - if (!(hw_blk[i].msk & val)) { - i++; - continue; - } - dev_warn(dev, "%s ras non-fatal error identified\n", - hw_blk[i].name); - if (hw_blk[i].process_error) - hw_blk[i].process_error(hdev, - HCLGE_ERR_INT_RAS_NFE); - i++; - } + u32 status; + + status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); + + /* Handling Non-fatal HNS RAS errors */ + if (status & HCLGE_RAS_REG_NFE_MASK) { + dev_warn(dev, + "HNS Non-Fatal RAS error(status=0x%x) identified\n", + status); + hclge_handle_all_ras_errors(hdev); + } else { + if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || + hdev->pdev->revision < 0x21) + return PCI_ERS_RESULT_RECOVERED; } - return PCI_ERS_RESULT_NEED_RESET; + if (status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { + dev_warn(dev, "ROCEE uncorrected RAS error identified\n"); + hclge_handle_rocee_ras_error(ae_dev); + } + + if (status & HCLGE_RAS_REG_NFE_MASK || + status & HCLGE_RAS_REG_ROCEE_ERR_MASK) + return PCI_ERS_RESULT_NEED_RESET; + + return PCI_ERS_RESULT_RECOVERED; +} + +int hclge_handle_hw_msix_error(struct hclge_dev *hdev, + unsigned long *reset_requests) +{ + struct device *dev = &hdev->pdev->dev; + u32 mpf_bd_num, pf_bd_num, bd_num; + struct hclge_desc desc_bd; + struct hclge_desc *desc; + __le32 *desc_data; + int ret = 0; + u32 status; + + /* set default handling */ + set_bit(HNAE3_FUNC_RESET, reset_requests); + + /* query the number of bds for the MSIx int status */ + hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_MSIX_INT_STS_BD_NUM, + true); + ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); + if (ret) { + dev_err(dev, "fail(%d) to query msix int status bd num\n", + ret); + /* reset everything for now */ + set_bit(HNAE3_GLOBAL_RESET, reset_requests); + return ret; + } + + mpf_bd_num = le32_to_cpu(desc_bd.data[0]); + pf_bd_num = le32_to_cpu(desc_bd.data[1]); + bd_num = max_t(u32, mpf_bd_num, pf_bd_num); + + desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); + if (!desc) + goto out; + + /* query all main PF MSIx errors */ + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, + true); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num); + if (ret) { + dev_err(dev, "query all mpf msix int cmd failed (%d)\n", + ret); + /* reset everything for now */ + set_bit(HNAE3_GLOBAL_RESET, reset_requests); + goto msi_error; + } + + /* log MAC errors */ + desc_data = (__le32 *)&desc[1]; + status = le32_to_cpu(*desc_data); + if (status) { + hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R", + &hclge_mac_afifo_tnl_int[0], status); + set_bit(HNAE3_GLOBAL_RESET, reset_requests); + } + + /* log PPU(RCB) errors */ + desc_data = (__le32 *)&desc[5]; + status = le32_to_cpu(*(desc_data + 2)) & + HCLGE_PPU_MPF_INT_ST2_MSIX_MASK; + if (status) { + dev_warn(dev, + "PPU_MPF_ABNORMAL_INT_ST2[28:29], err_status(0x%x)\n", + status); + set_bit(HNAE3_CORE_RESET, reset_requests); + } + + /* clear all main PF MSIx errors */ + hclge_cmd_reuse_desc(&desc[0], false); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num); + if (ret) { + dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", + ret); + /* reset everything for now */ + set_bit(HNAE3_GLOBAL_RESET, reset_requests); + goto msi_error; + } + + /* query all PF MSIx errors */ + memset(desc, 0, bd_num * sizeof(struct hclge_desc)); + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT, + true); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num); + if (ret) { + dev_err(dev, "query all pf msix int cmd failed (%d)\n", + ret); + /* reset everything for now */ + set_bit(HNAE3_GLOBAL_RESET, reset_requests); + goto msi_error; + } + + /* log SSU PF errors */ + status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK; + if (status) { + hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", + &hclge_ssu_port_based_pf_int[0], status); + set_bit(HNAE3_GLOBAL_RESET, reset_requests); + } + + /* read and log PPP PF errors */ + desc_data = (__le32 *)&desc[2]; + status = le32_to_cpu(*desc_data); + if (status) + hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0", + &hclge_ppp_pf_abnormal_int[0], status); + + /* PPU(RCB) PF errors */ + desc_data = (__le32 *)&desc[3]; + status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK; + if (status) + hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST", + &hclge_ppu_pf_abnormal_int[0], status); + + /* clear all PF MSIx errors */ + hclge_cmd_reuse_desc(&desc[0], false); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num); + if (ret) { + dev_err(dev, "clear all pf msix int cmd failed (%d)\n", + ret); + /* reset everything for now */ + set_bit(HNAE3_GLOBAL_RESET, reset_requests); + } + +msi_error: + kfree(desc); +out: + return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h index e0e3b5861495..51a7d4eb066a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -7,9 +7,11 @@ #include "hclge_main.h" #define HCLGE_RAS_PF_OTHER_INT_STS_REG 0x20B00 -#define HCLGE_RAS_REG_FE_MASK 0xFF #define HCLGE_RAS_REG_NFE_MASK 0xFF00 -#define HCLGE_RAS_REG_NFE_SHIFT 8 +#define HCLGE_RAS_REG_ROCEE_ERR_MASK 0x3000000 + +#define HCLGE_VECTOR0_PF_OTHER_INT_STS_REG 0x20800 +#define HCLGE_VECTOR0_REG_MSIX_MASK 0x1FF00 #define HCLGE_IMP_TCM_ECC_ERR_INT_EN 0xFFFF0000 #define HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK 0xFFFF0000 @@ -23,6 +25,8 @@ #define HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK 0x0100 #define HCLGE_TQP_ECC_ERR_INT_EN 0x0FFF #define HCLGE_TQP_ECC_ERR_INT_EN_MASK 0x0FFF +#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK 0x0F000000 +#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN 0x0F000000 #define HCLGE_IGU_ERR_INT_EN 0x0000066F #define HCLGE_IGU_ERR_INT_EN_MASK 0x000F #define HCLGE_IGU_TNL_ERR_INT_EN 0x0002AABF @@ -41,21 +45,55 @@ #define HCLGE_TM_QCN_MEM_ERR_INT_EN 0xFFFFFF #define HCLGE_NCSI_ERR_INT_EN 0x3 #define HCLGE_NCSI_ERR_INT_TYPE 0x9 +#define HCLGE_MAC_COMMON_ERR_INT_EN GENMASK(7, 0) +#define HCLGE_MAC_COMMON_ERR_INT_EN_MASK GENMASK(7, 0) +#define HCLGE_PPU_MPF_ABNORMAL_INT0_EN GENMASK(31, 0) +#define HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK GENMASK(31, 0) +#define HCLGE_PPU_MPF_ABNORMAL_INT1_EN GENMASK(31, 0) +#define HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK GENMASK(31, 0) +#define HCLGE_PPU_MPF_ABNORMAL_INT2_EN 0x3FFF3FFF +#define HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK 0x3FFF3FFF +#define HCLGE_PPU_MPF_ABNORMAL_INT2_EN2 0xB +#define HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK 0xB +#define HCLGE_PPU_MPF_ABNORMAL_INT3_EN GENMASK(7, 0) +#define HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK GENMASK(23, 16) +#define HCLGE_PPU_PF_ABNORMAL_INT_EN GENMASK(5, 0) +#define HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK GENMASK(5, 0) +#define HCLGE_SSU_1BIT_ECC_ERR_INT_EN GENMASK(31, 0) +#define HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK GENMASK(31, 0) +#define HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN GENMASK(31, 0) +#define HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK GENMASK(31, 0) +#define HCLGE_SSU_BIT32_ECC_ERR_INT_EN 0x0101 +#define HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK 0x0101 +#define HCLGE_SSU_COMMON_INT_EN GENMASK(9, 0) +#define HCLGE_SSU_COMMON_INT_EN_MASK GENMASK(9, 0) +#define HCLGE_SSU_PORT_BASED_ERR_INT_EN 0x0BFF +#define HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK 0x0BFF0000 +#define HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN GENMASK(23, 0) +#define HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK GENMASK(23, 0) + +#define HCLGE_SSU_COMMON_ERR_INT_MASK GENMASK(9, 0) +#define HCLGE_SSU_PORT_INT_MSIX_MASK 0x7BFF +#define HCLGE_IGU_INT_MASK GENMASK(3, 0) +#define HCLGE_IGU_EGU_TNL_INT_MASK GENMASK(5, 0) +#define HCLGE_PPP_MPF_INT_ST3_MASK GENMASK(5, 0) +#define HCLGE_PPU_MPF_INT_ST3_MASK GENMASK(7, 0) +#define HCLGE_PPU_MPF_INT_ST2_MSIX_MASK GENMASK(29, 28) +#define HCLGE_PPU_PF_INT_MSIX_MASK 0x27 +#define HCLGE_QCN_FIFO_INT_MASK GENMASK(17, 0) +#define HCLGE_QCN_ECC_INT_MASK GENMASK(21, 0) +#define HCLGE_NCSI_ECC_INT_MASK GENMASK(1, 0) -#define HCLGE_IMP_TCM_ECC_INT_MASK 0xFFFF -#define HCLGE_IMP_ITCM4_ECC_INT_MASK 0x3 -#define HCLGE_CMDQ_ECC_INT_MASK 0xFFFF -#define HCLGE_CMDQ_ROC_ECC_INT_SHIFT 16 -#define HCLGE_TQP_ECC_INT_MASK 0xFFF -#define HCLGE_TQP_ECC_INT_SHIFT 16 -#define HCLGE_IMP_TCM_ECC_CLR_MASK 0xFFFF -#define HCLGE_IMP_ITCM4_ECC_CLR_MASK 0x3 -#define HCLGE_CMDQ_NIC_ECC_CLR_MASK 0xFFFF -#define HCLGE_CMDQ_ROCEE_ECC_CLR_MASK 0xFFFF0000 -#define HCLGE_TQP_IMP_ERR_CLR_MASK 0x0FFF0001 -#define HCLGE_IGU_COM_INT_MASK 0xF -#define HCLGE_IGU_EGU_TNL_INT_MASK 0x3F -#define HCLGE_PPP_PF_INT_MASK 0x100 +#define HCLGE_ROCEE_RAS_NFE_INT_EN 0xF +#define HCLGE_ROCEE_RAS_CE_INT_EN 0x1 +#define HCLGE_ROCEE_RAS_NFE_INT_EN_MASK 0xF +#define HCLGE_ROCEE_RAS_CE_INT_EN_MASK 0x1 +#define HCLGE_ROCEE_RERR_INT_MASK BIT(0) +#define HCLGE_ROCEE_BERR_INT_MASK BIT(1) +#define HCLGE_ROCEE_ECC_INT_MASK BIT(2) +#define HCLGE_ROCEE_OVF_INT_MASK BIT(3) +#define HCLGE_ROCEE_OVF_ERR_INT_MASK 0x10000 +#define HCLGE_ROCEE_OVF_ERR_TYPE_MASK 0x3F enum hclge_err_int_type { HCLGE_ERR_INT_MSIX = 0, @@ -67,9 +105,7 @@ enum hclge_err_int_type { struct hclge_hw_blk { u32 msk; const char *name; - int (*enable_error)(struct hclge_dev *hdev, bool en); - void (*process_error)(struct hclge_dev *hdev, - enum hclge_err_int_type type); + int (*config_err_int)(struct hclge_dev *hdev, bool en); }; struct hclge_hw_error { @@ -78,6 +114,7 @@ struct hclge_hw_error { }; int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state); -int hclge_enable_tm_hw_error(struct hclge_dev *hdev, bool en); -pci_ers_result_t hclge_process_ras_hw_error(struct hnae3_ae_dev *ae_dev); +pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev); +int hclge_handle_hw_msix_error(struct hclge_dev *hdev, + unsigned long *reset_requests); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 8432c841aa4f..22380202ba83 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2200,12 +2200,13 @@ static void hclge_service_complete(struct hclge_dev *hdev) static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) { - u32 rst_src_reg; - u32 cmdq_src_reg; + u32 rst_src_reg, cmdq_src_reg, msix_src_reg; /* fetch the events from their corresponding regs */ rst_src_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS); cmdq_src_reg = hclge_read_dev(&hdev->hw, HCLGE_VECTOR0_CMDQ_SRC_REG); + msix_src_reg = hclge_read_dev(&hdev->hw, + HCLGE_VECTOR0_PF_OTHER_INT_STS_REG); /* Assumption: If by any chance reset and mailbox events are reported * together then we will only process reset event in this go and will @@ -2239,6 +2240,10 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) return HCLGE_VECTOR0_EVENT_RST; } + /* check for vector0 msix event source */ + if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) + return HCLGE_VECTOR0_EVENT_ERR; + /* check for vector0 mailbox(=CMDQ RX) event source */ if (BIT(HCLGE_VECTOR0_RX_CMDQ_INT_B) & cmdq_src_reg) { cmdq_src_reg &= ~BIT(HCLGE_VECTOR0_RX_CMDQ_INT_B); @@ -2289,6 +2294,19 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data) /* vector 0 interrupt is shared with reset and mailbox source events.*/ switch (event_cause) { + case HCLGE_VECTOR0_EVENT_ERR: + /* we do not know what type of reset is required now. This could + * only be decided after we fetch the type of errors which + * caused this event. Therefore, we will do below for now: + * 1. Assert HNAE3_UNKNOWN_RESET type of reset. This means we + * have defered type of reset to be used. + * 2. Schedule the reset serivce task. + * 3. When service task receives HNAE3_UNKNOWN_RESET type it + * will fetch the correct type of reset. This would be done + * by first decoding the types of errors. + */ + set_bit(HNAE3_UNKNOWN_RESET, &hdev->reset_request); + /* fall through */ case HCLGE_VECTOR0_EVENT_RST: hclge_reset_task_schedule(hdev); break; @@ -2593,6 +2611,23 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hclge_dev *hdev, { enum hnae3_reset_type rst_level = HNAE3_NONE_RESET; + /* first, resolve any unknown reset type to the known type(s) */ + if (test_bit(HNAE3_UNKNOWN_RESET, addr)) { + /* we will intentionally ignore any errors from this function + * as we will end up in *some* reset request in any case + */ + hclge_handle_hw_msix_error(hdev, addr); + clear_bit(HNAE3_UNKNOWN_RESET, addr); + /* We defered the clearing of the error event which caused + * interrupt since it was not posssible to do that in + * interrupt context (and this is the reason we introduced + * new UNKNOWN reset type). Now, the errors have been + * handled and cleared in hardware we can safely enable + * interrupts. This is an exception to the norm. + */ + hclge_enable_vector(&hdev->misc_vector, true); + } + /* return the highest priority reset level amongst all */ if (test_bit(HNAE3_IMP_RESET, addr)) { rst_level = HNAE3_IMP_RESET; @@ -7269,7 +7304,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) ret = hclge_hw_error_set_state(hdev, true); if (ret) { dev_err(&pdev->dev, - "hw error interrupts enable failed, ret =%d\n", ret); + "fail(%d) to enable hw error interrupts\n", ret); goto err_mdiobus_unreg; } @@ -7405,11 +7440,15 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) return ret; } - /* Re-enable the TM hw error interrupts because - * they get disabled on core/global reset. + /* Re-enable the hw error interrupts because + * the interrupts get disabled on core/global reset. */ - if (hclge_enable_tm_hw_error(hdev, true)) - dev_err(&pdev->dev, "failed to enable TM hw error interrupts\n"); + ret = hclge_hw_error_set_state(hdev, true); + if (ret) { + dev_err(&pdev->dev, + "fail(%d) to re-enable HNS hw error interrupts\n", ret); + return ret; + } hclge_reset_vport_state(hdev); @@ -7931,7 +7970,7 @@ static const struct hnae3_ae_ops hclge_ops = { .restore_fd_rules = hclge_restore_fd_entries, .enable_fd = hclge_enable_fd, .dbg_run_cmd = hclge_dbg_run_cmd, - .process_hw_error = hclge_process_ras_hw_error, + .handle_hw_ras_error = hclge_handle_hw_ras_error, .get_hw_reset_stat = hclge_get_hw_reset_stat, .ae_dev_resetting = hclge_ae_dev_resetting, .ae_dev_reset_cnt = hclge_ae_dev_reset_cnt, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index ee6024270d5a..106fce172563 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -205,6 +205,7 @@ enum HCLGE_DEV_STATE { enum hclge_evt_cause { HCLGE_VECTOR0_EVENT_RST, HCLGE_VECTOR0_EVENT_MBX, + HCLGE_VECTOR0_EVENT_ERR, HCLGE_VECTOR0_EVENT_OTHER, }; |