forked from ~ljy/RK356X_SDK_RELEASE

hc
2023-12-09 b22da3d8526a935aa31e086e63f60ff3246cb61c
kernel/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
....@@ -30,6 +30,7 @@
3030 * SOFTWARE.
3131 */
3232 #define CREATE_TRACE_POINTS
33
+#include "lib/eq.h"
3334 #include "fw_tracer.h"
3435 #include "fw_tracer_tracepoint.h"
3536
....@@ -123,7 +124,7 @@
123124 static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer)
124125 {
125126 struct mlx5_core_dev *dev = tracer->dev;
126
- struct device *ddev = &dev->pdev->dev;
127
+ struct device *ddev;
127128 dma_addr_t dma;
128129 void *buff;
129130 gfp_t gfp;
....@@ -141,6 +142,7 @@
141142 }
142143 tracer->buff.log_buf = buff;
143144
145
+ ddev = mlx5_core_dma_dev(dev);
144146 dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE);
145147 if (dma_mapping_error(ddev, dma)) {
146148 mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n",
....@@ -161,11 +163,12 @@
161163 static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer)
162164 {
163165 struct mlx5_core_dev *dev = tracer->dev;
164
- struct device *ddev = &dev->pdev->dev;
166
+ struct device *ddev;
165167
166168 if (!tracer->buff.log_buf)
167169 return;
168170
171
+ ddev = mlx5_core_dma_dev(dev);
169172 dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE);
170173 free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
171174 }
....@@ -187,7 +190,7 @@
187190
188191 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
189192 DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2));
190
- mtt = (u64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
193
+ mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
191194 for (i = 0 ; i < TRACER_BUFFER_PAGE_NUM ; i++)
192195 mtt[i] = cpu_to_be64(tracer->buff.dma + i * PAGE_SIZE);
193196
....@@ -240,6 +243,19 @@
240243 free_strings_db:
241244 mlx5_fw_tracer_free_strings_db(tracer);
242245 return -ENOMEM;
246
+}
247
+
248
+static void
249
+mlx5_fw_tracer_init_saved_traces_array(struct mlx5_fw_tracer *tracer)
250
+{
251
+ tracer->st_arr.saved_traces_index = 0;
252
+ mutex_init(&tracer->st_arr.lock);
253
+}
254
+
255
+static void
256
+mlx5_fw_tracer_clean_saved_traces_array(struct mlx5_fw_tracer *tracer)
257
+{
258
+ mutex_destroy(&tracer->st_arr.lock);
243259 }
244260
245261 static void mlx5_tracer_read_strings_db(struct work_struct *work)
....@@ -521,9 +537,28 @@
521537 list_del(&str_frmt->list);
522538 }
523539
524
-static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
525
- struct mlx5_core_dev *dev,
526
- u64 trace_timestamp)
540
+static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer,
541
+ u64 timestamp, bool lost,
542
+ u8 event_id, char *msg)
543
+{
544
+ struct mlx5_fw_trace_data *trace_data;
545
+
546
+ mutex_lock(&tracer->st_arr.lock);
547
+ trace_data = &tracer->st_arr.straces[tracer->st_arr.saved_traces_index];
548
+ trace_data->timestamp = timestamp;
549
+ trace_data->lost = lost;
550
+ trace_data->event_id = event_id;
551
+ strscpy_pad(trace_data->msg, msg, TRACE_STR_MSG);
552
+
553
+ tracer->st_arr.saved_traces_index =
554
+ (tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1);
555
+ mutex_unlock(&tracer->st_arr.lock);
556
+}
557
+
558
+static noinline
559
+void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
560
+ struct mlx5_core_dev *dev,
561
+ u64 trace_timestamp)
527562 {
528563 char tmp[512];
529564
....@@ -538,6 +573,9 @@
538573
539574 trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
540575 str_frmt->event_id, tmp);
576
+
577
+ mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp,
578
+ str_frmt->lost, str_frmt->event_id, tmp);
541579
542580 /* remove it from hash */
543581 mlx5_tracer_clean_message(str_frmt);
....@@ -600,7 +638,7 @@
600638 trace_timestamp = (timestamp_event.timestamp & MASK_52_7) |
601639 (str_frmt->timestamp & MASK_6_0);
602640 else
603
- trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) |
641
+ trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) |
604642 (str_frmt->timestamp & MASK_6_0);
605643
606644 mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp);
....@@ -637,10 +675,13 @@
637675 if (!tracer->owner)
638676 return;
639677
678
+ if (unlikely(!tracer->str_db.loaded))
679
+ goto arm;
680
+
640681 block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
641682 start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
642683
643
- /* Copy the block to local buffer to avoid HW override while being processed*/
684
+ /* Copy the block to local buffer to avoid HW override while being processed */
644685 memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
645686 TRACER_BLOCK_SIZE_BYTE);
646687
....@@ -648,7 +689,7 @@
648689 get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]);
649690
650691 while (block_timestamp > tracer->last_timestamp) {
651
- /* Check block override if its not the first block */
692
+ /* Check block override if it's not the first block */
652693 if (!tracer->last_timestamp) {
653694 u64 *ts_event;
654695 /* To avoid block override be the HW in case of buffer
....@@ -694,6 +735,7 @@
694735 &tmp_trace_block[TRACES_PER_BLOCK - 1]);
695736 }
696737
738
+arm:
697739 mlx5_fw_tracer_arm(dev);
698740 }
699741
....@@ -785,6 +827,109 @@
785827 mlx5_fw_tracer_start(tracer);
786828 }
787829
830
+static int mlx5_fw_tracer_set_core_dump_reg(struct mlx5_core_dev *dev,
831
+ u32 *in, int size_in)
832
+{
833
+ u32 out[MLX5_ST_SZ_DW(core_dump_reg)] = {};
834
+
835
+ if (!MLX5_CAP_DEBUG(dev, core_dump_general) &&
836
+ !MLX5_CAP_DEBUG(dev, core_dump_qp))
837
+ return -EOPNOTSUPP;
838
+
839
+ return mlx5_core_access_reg(dev, in, size_in, out, sizeof(out),
840
+ MLX5_REG_CORE_DUMP, 0, 1);
841
+}
842
+
843
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev)
844
+{
845
+ struct mlx5_fw_tracer *tracer = dev->tracer;
846
+ u32 in[MLX5_ST_SZ_DW(core_dump_reg)] = {};
847
+ int err;
848
+
849
+ if (!MLX5_CAP_DEBUG(dev, core_dump_general) || !tracer)
850
+ return -EOPNOTSUPP;
851
+ if (!tracer->owner)
852
+ return -EPERM;
853
+
854
+ MLX5_SET(core_dump_reg, in, core_dump_type, 0x0);
855
+
856
+ err = mlx5_fw_tracer_set_core_dump_reg(dev, in, sizeof(in));
857
+ if (err)
858
+ return err;
859
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
860
+ flush_workqueue(tracer->work_queue);
861
+ return 0;
862
+}
863
+
864
+static int
865
+mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg,
866
+ struct mlx5_fw_trace_data *trace_data)
867
+{
868
+ int err;
869
+
870
+ err = devlink_fmsg_obj_nest_start(fmsg);
871
+ if (err)
872
+ return err;
873
+
874
+ err = devlink_fmsg_u64_pair_put(fmsg, "timestamp", trace_data->timestamp);
875
+ if (err)
876
+ return err;
877
+
878
+ err = devlink_fmsg_bool_pair_put(fmsg, "lost", trace_data->lost);
879
+ if (err)
880
+ return err;
881
+
882
+ err = devlink_fmsg_u8_pair_put(fmsg, "event_id", trace_data->event_id);
883
+ if (err)
884
+ return err;
885
+
886
+ err = devlink_fmsg_string_pair_put(fmsg, "msg", trace_data->msg);
887
+ if (err)
888
+ return err;
889
+
890
+ err = devlink_fmsg_obj_nest_end(fmsg);
891
+ if (err)
892
+ return err;
893
+ return 0;
894
+}
895
+
896
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
897
+ struct devlink_fmsg *fmsg)
898
+{
899
+ struct mlx5_fw_trace_data *straces = tracer->st_arr.straces;
900
+ u32 index, start_index, end_index;
901
+ u32 saved_traces_index;
902
+ int err;
903
+
904
+ if (!straces[0].timestamp)
905
+ return -ENOMSG;
906
+
907
+ mutex_lock(&tracer->st_arr.lock);
908
+ saved_traces_index = tracer->st_arr.saved_traces_index;
909
+ if (straces[saved_traces_index].timestamp)
910
+ start_index = saved_traces_index;
911
+ else
912
+ start_index = 0;
913
+ end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1);
914
+
915
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump fw traces");
916
+ if (err)
917
+ goto unlock;
918
+ index = start_index;
919
+ while (index != end_index) {
920
+ err = mlx5_devlink_fmsg_fill_trace(fmsg, &straces[index]);
921
+ if (err)
922
+ goto unlock;
923
+
924
+ index = (index + 1) & (SAVED_TRACES_NUM - 1);
925
+ }
926
+
927
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
928
+unlock:
929
+ mutex_unlock(&tracer->st_arr.lock);
930
+ return err;
931
+}
932
+
788933 /* Create software resources (Buffers, etc ..) */
789934 struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
790935 {
....@@ -832,6 +977,7 @@
832977 goto free_log_buf;
833978 }
834979
980
+ mlx5_fw_tracer_init_saved_traces_array(tracer);
835981 mlx5_core_dbg(dev, "FWTracer: Tracer created\n");
836982
837983 return tracer;
....@@ -846,9 +992,9 @@
846992 return ERR_PTR(err);
847993 }
848994
849
-/* Create HW resources + start tracer
850
- * must be called before Async EQ is created
851
- */
995
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data);
996
+
997
+/* Create HW resources + start tracer */
852998 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
853999 {
8541000 struct mlx5_core_dev *dev;
....@@ -865,7 +1011,7 @@
8651011 err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn);
8661012 if (err) {
8671013 mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err);
868
- return err;
1014
+ goto err_cancel_work;
8691015 }
8701016
8711017 err = mlx5_fw_tracer_create_mkey(tracer);
....@@ -874,18 +1020,27 @@
8741020 goto err_dealloc_pd;
8751021 }
8761022
877
- mlx5_fw_tracer_start(tracer);
1023
+ MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
1024
+ mlx5_eq_notifier_register(dev, &tracer->nb);
8781025
1026
+ err = mlx5_fw_tracer_start(tracer);
1027
+ if (err) {
1028
+ mlx5_core_warn(dev, "FWTracer: Failed to start tracer %d\n", err);
1029
+ goto err_notifier_unregister;
1030
+ }
8791031 return 0;
8801032
1033
+err_notifier_unregister:
1034
+ mlx5_eq_notifier_unregister(dev, &tracer->nb);
1035
+ mlx5_core_destroy_mkey(dev, &tracer->buff.mkey);
8811036 err_dealloc_pd:
8821037 mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
1038
+err_cancel_work:
1039
+ cancel_work_sync(&tracer->read_fw_strings_work);
8831040 return err;
8841041 }
8851042
886
-/* Stop tracer + Cleanup HW resources
887
- * must be called after Async EQ is destroyed
888
- */
1043
+/* Stop tracer + Cleanup HW resources */
8891044 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
8901045 {
8911046 if (IS_ERR_OR_NULL(tracer))
....@@ -893,7 +1048,7 @@
8931048
8941049 mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
8951050 tracer->owner);
896
-
1051
+ mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb);
8971052 cancel_work_sync(&tracer->ownership_change_work);
8981053 cancel_work_sync(&tracer->handle_traces_work);
8991054
....@@ -915,6 +1070,7 @@
9151070 cancel_work_sync(&tracer->read_fw_strings_work);
9161071 mlx5_fw_tracer_clean_ready_list(tracer);
9171072 mlx5_fw_tracer_clean_print_hash(tracer);
1073
+ mlx5_fw_tracer_clean_saved_traces_array(tracer);
9181074 mlx5_fw_tracer_free_strings_db(tracer);
9191075 mlx5_fw_tracer_destroy_log_buf(tracer);
9201076 flush_workqueue(tracer->work_queue);
....@@ -922,12 +1078,63 @@
9221078 kvfree(tracer);
9231079 }
9241080
925
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
1081
+static int mlx5_fw_tracer_recreate_strings_db(struct mlx5_fw_tracer *tracer)
9261082 {
927
- struct mlx5_fw_tracer *tracer = dev->tracer;
1083
+ struct mlx5_core_dev *dev;
1084
+ int err;
9281085
929
- if (!tracer)
930
- return;
1086
+ cancel_work_sync(&tracer->read_fw_strings_work);
1087
+ mlx5_fw_tracer_clean_ready_list(tracer);
1088
+ mlx5_fw_tracer_clean_print_hash(tracer);
1089
+ mlx5_fw_tracer_clean_saved_traces_array(tracer);
1090
+ mlx5_fw_tracer_free_strings_db(tracer);
1091
+
1092
+ dev = tracer->dev;
1093
+ err = mlx5_query_mtrc_caps(tracer);
1094
+ if (err) {
1095
+ mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err);
1096
+ return err;
1097
+ }
1098
+
1099
+ err = mlx5_fw_tracer_allocate_strings_db(tracer);
1100
+ if (err) {
1101
+ mlx5_core_warn(dev, "FWTracer: Allocate strings DB failed %d\n", err);
1102
+ return err;
1103
+ }
1104
+ mlx5_fw_tracer_init_saved_traces_array(tracer);
1105
+
1106
+ return 0;
1107
+}
1108
+
1109
+int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer)
1110
+{
1111
+ struct mlx5_core_dev *dev;
1112
+ int err;
1113
+
1114
+ if (IS_ERR_OR_NULL(tracer))
1115
+ return -EINVAL;
1116
+
1117
+ dev = tracer->dev;
1118
+ mlx5_fw_tracer_cleanup(tracer);
1119
+ err = mlx5_fw_tracer_recreate_strings_db(tracer);
1120
+ if (err) {
1121
+ mlx5_core_warn(dev, "Failed to recreate FW tracer strings DB\n");
1122
+ return err;
1123
+ }
1124
+ err = mlx5_fw_tracer_init(tracer);
1125
+ if (err) {
1126
+ mlx5_core_warn(dev, "Failed to re-initialize FW tracer\n");
1127
+ return err;
1128
+ }
1129
+
1130
+ return 0;
1131
+}
1132
+
1133
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
1134
+{
1135
+ struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
1136
+ struct mlx5_core_dev *dev = tracer->dev;
1137
+ struct mlx5_eqe *eqe = data;
9311138
9321139 switch (eqe->sub_type) {
9331140 case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
....@@ -935,13 +1142,14 @@
9351142 queue_work(tracer->work_queue, &tracer->ownership_change_work);
9361143 break;
9371144 case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
938
- if (likely(tracer->str_db.loaded))
939
- queue_work(tracer->work_queue, &tracer->handle_traces_work);
1145
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
9401146 break;
9411147 default:
9421148 mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
9431149 eqe->sub_type);
9441150 }
1151
+
1152
+ return NOTIFY_OK;
9451153 }
9461154
9471155 EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);