summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c139
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c111
3 files changed, 270 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 6999f4486e9e..8a4930c8bf62 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -243,6 +243,19 @@ free_strings_db:
return -ENOMEM;
}
+static void
+mlx5_fw_tracer_init_saved_traces_array(struct mlx5_fw_tracer *tracer)
+{
+ tracer->st_arr.saved_traces_index = 0;
+ mutex_init(&tracer->st_arr.lock);
+}
+
+static void
+mlx5_fw_tracer_clean_saved_traces_array(struct mlx5_fw_tracer *tracer)
+{
+ mutex_destroy(&tracer->st_arr.lock);
+}
+
static void mlx5_tracer_read_strings_db(struct work_struct *work)
{
struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
@@ -522,6 +535,24 @@ static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer)
list_del(&str_frmt->list);
}
+static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer,
+ u64 timestamp, bool lost,
+ u8 event_id, char *msg)
+{
+ struct mlx5_fw_trace_data *trace_data;
+
+ mutex_lock(&tracer->st_arr.lock);
+ trace_data = &tracer->st_arr.straces[tracer->st_arr.saved_traces_index];
+ trace_data->timestamp = timestamp;
+ trace_data->lost = lost;
+ trace_data->event_id = event_id;
+ strncpy(trace_data->msg, msg, TRACE_STR_MSG);
+
+ tracer->st_arr.saved_traces_index =
+ (tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1);
+ mutex_unlock(&tracer->st_arr.lock);
+}
+
static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
struct mlx5_core_dev *dev,
u64 trace_timestamp)
@@ -540,6 +571,9 @@ static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
str_frmt->event_id, tmp);
+ mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp,
+ str_frmt->lost, str_frmt->event_id, tmp);
+
/* remove it from hash */
mlx5_tracer_clean_message(str_frmt);
}
@@ -786,6 +820,109 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work)
mlx5_fw_tracer_start(tracer);
}
+static int mlx5_fw_tracer_set_core_dump_reg(struct mlx5_core_dev *dev,
+ u32 *in, int size_in)
+{
+ u32 out[MLX5_ST_SZ_DW(core_dump_reg)] = {};
+
+ if (!MLX5_CAP_DEBUG(dev, core_dump_general) &&
+ !MLX5_CAP_DEBUG(dev, core_dump_qp))
+ return -EOPNOTSUPP;
+
+ return mlx5_core_access_reg(dev, in, size_in, out, sizeof(out),
+ MLX5_REG_CORE_DUMP, 0, 1);
+}
+
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev)
+{
+ struct mlx5_fw_tracer *tracer = dev->tracer;
+ u32 in[MLX5_ST_SZ_DW(core_dump_reg)] = {};
+ int err;
+
+ if (!MLX5_CAP_DEBUG(dev, core_dump_general) || !tracer)
+ return -EOPNOTSUPP;
+ if (!tracer->owner)
+ return -EPERM;
+
+ MLX5_SET(core_dump_reg, in, core_dump_type, 0x0);
+
+ err = mlx5_fw_tracer_set_core_dump_reg(dev, in, sizeof(in));
+ if (err)
+ return err;
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
+ flush_workqueue(tracer->work_queue);
+ return 0;
+}
+
+static int
+mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg,
+ struct mlx5_fw_trace_data *trace_data)
+{
+ int err;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "timestamp", trace_data->timestamp);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_bool_pair_put(fmsg, "lost", trace_data->lost);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "event_id", trace_data->event_id);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_pair_put(fmsg, "msg", trace_data->msg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+ return 0;
+}
+
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5_fw_trace_data *straces = tracer->st_arr.straces;
+ u32 index, start_index, end_index;
+ u32 saved_traces_index;
+ int err;
+
+ if (!straces[0].timestamp)
+ return -ENOMSG;
+
+ mutex_lock(&tracer->st_arr.lock);
+ saved_traces_index = tracer->st_arr.saved_traces_index;
+ if (straces[saved_traces_index].timestamp)
+ start_index = saved_traces_index;
+ else
+ start_index = 0;
+ end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1);
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump fw traces");
+ if (err)
+ goto unlock;
+ index = start_index;
+ while (index != end_index) {
+ err = mlx5_devlink_fmsg_fill_trace(fmsg, &straces[index]);
+ if (err)
+ goto unlock;
+
+ index = (index + 1) & (SAVED_TRACES_NUM - 1);
+ }
+
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+unlock:
+ mutex_unlock(&tracer->st_arr.lock);
+ return err;
+}
+
/* Create software resources (Buffers, etc ..) */
struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
{
@@ -833,6 +970,7 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
goto free_log_buf;
}
+ mlx5_fw_tracer_init_saved_traces_array(tracer);
mlx5_core_dbg(dev, "FWTracer: Tracer created\n");
return tracer;
@@ -917,6 +1055,7 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
cancel_work_sync(&tracer->read_fw_strings_work);
mlx5_fw_tracer_clean_ready_list(tracer);
mlx5_fw_tracer_clean_print_hash(tracer);
+ mlx5_fw_tracer_clean_saved_traces_array(tracer);
mlx5_fw_tracer_free_strings_db(tracer);
mlx5_fw_tracer_destroy_log_buf(tracer);
flush_workqueue(tracer->work_queue);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index a8b8747f2b61..40601fba80ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -46,6 +46,9 @@
#define TRACER_BLOCK_SIZE_BYTE 256
#define TRACES_PER_BLOCK 32
+#define TRACE_STR_MSG 256
+#define SAVED_TRACES_NUM 8192
+
#define TRACER_MAX_PARAMS 7
#define MESSAGE_HASH_BITS 6
#define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS)
@@ -53,6 +56,13 @@
#define MASK_52_7 (0x1FFFFFFFFFFF80)
#define MASK_6_0 (0x7F)
+struct mlx5_fw_trace_data {
+ u64 timestamp;
+ bool lost;
+ u8 event_id;
+ char msg[TRACE_STR_MSG];
+};
+
struct mlx5_fw_tracer {
struct mlx5_core_dev *dev;
struct mlx5_nb nb;
@@ -83,6 +93,13 @@ struct mlx5_fw_tracer {
u32 consumer_index;
} buff;
+ /* Saved Traces Array */
+ struct {
+ struct mlx5_fw_trace_data straces[SAVED_TRACES_NUM];
+ u32 saved_traces_index;
+ struct mutex lock; /* Protect st_arr access */
+ } st_arr;
+
u64 last_timestamp;
struct work_struct handle_traces_work;
struct hlist_head hash[MESSAGE_HASH_SIZE];
@@ -171,5 +188,8 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+ struct devlink_fmsg *fmsg);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 973cc005ae60..1c20d3f1d238 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -41,6 +41,7 @@
#include "lib/eq.h"
#include "lib/mlx5.h"
#include "lib/pci_vsc.h"
+#include "diag/fw_tracer.h"
enum {
MLX5_HEALTH_POLL_INTERVAL = 2 * HZ,
@@ -405,9 +406,119 @@ mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
}
+struct mlx5_fw_reporter_ctx {
+ u8 err_synd;
+ int miss_counter;
+};
+
+static int
+mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg,
+ struct mlx5_fw_reporter_ctx *fw_reporter_ctx)
+{
+ int err;
+
+ err = devlink_fmsg_u8_pair_put(fmsg, "syndrome",
+ fw_reporter_ctx->err_synd);
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter",
+ fw_reporter_ctx->miss_counter);
+ if (err)
+ return err;
+ return 0;
+}
+
+static int
+mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
+ struct devlink_fmsg *fmsg)
+{
+ struct mlx5_core_health *health = &dev->priv.health;
+ struct health_buffer __iomem *h = health->health;
+ int err;
+ int i;
+
+ if (!ioread8(&h->synd))
+ return 0;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, "health buffer");
+ if (err)
+ return err;
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ return err;
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var");
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) {
+ err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i));
+ if (err)
+ return err;
+ }
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr",
+ ioread32be(&h->assert_exit_ptr));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra",
+ ioread32be(&h->assert_callra));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
+ if (err)
+ return err;
+ err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index",
+ ioread8(&h->irisc_index));
+ if (err)
+ return err;
+ err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd",
+ ioread16be(&h->ext_synd));
+ if (err)
+ return err;
+ err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver",
+ ioread32be(&h->fw_ver));
+ if (err)
+ return err;
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ return err;
+ return devlink_fmsg_pair_nest_end(fmsg);
+}
+
+static int
+mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *priv_ctx)
+{
+ struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+ int err;
+
+ err = mlx5_fw_tracer_trigger_core_dump_general(dev);
+ if (err)
+ return err;
+
+ if (priv_ctx) {
+ struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+
+ err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
+ if (err)
+ return err;
+ }
+
+ err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg);
+ if (err)
+ return err;
+ return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg);
+}
+
static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
.name = "fw",
.diagnose = mlx5_fw_reporter_diagnose,
+ .dump = mlx5_fw_reporter_dump,
};
static void mlx5_fw_reporter_create(struct mlx5_core_dev *dev)