forked from ~ljy/RK356X_SDK_RELEASE

hc
2024-05-10 9999e48639b3cecb08ffb37358bcba3b48161b29
kernel/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
....@@ -30,6 +30,7 @@
3030 * SOFTWARE.
3131 */
3232 #define CREATE_TRACE_POINTS
33
+#include "lib/eq.h"
3334 #include "fw_tracer.h"
3435 #include "fw_tracer_tracepoint.h"
3536
....@@ -63,6 +64,7 @@
6364 MLX5_GET(mtrc_cap, out, num_string_trace);
6465 tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db);
6566 tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner);
67
+ tracer->str_db.loaded = false;
6668
6769 for (i = 0; i < tracer->str_db.num_string_db; i++) {
6870 mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]);
....@@ -123,7 +125,7 @@
123125 static int mlx5_fw_tracer_create_log_buf(struct mlx5_fw_tracer *tracer)
124126 {
125127 struct mlx5_core_dev *dev = tracer->dev;
126
- struct device *ddev = &dev->pdev->dev;
128
+ struct device *ddev;
127129 dma_addr_t dma;
128130 void *buff;
129131 gfp_t gfp;
....@@ -141,6 +143,7 @@
141143 }
142144 tracer->buff.log_buf = buff;
143145
146
+ ddev = mlx5_core_dma_dev(dev);
144147 dma = dma_map_single(ddev, buff, tracer->buff.size, DMA_FROM_DEVICE);
145148 if (dma_mapping_error(ddev, dma)) {
146149 mlx5_core_warn(dev, "FWTracer: Unable to map DMA: %d\n",
....@@ -161,11 +164,12 @@
161164 static void mlx5_fw_tracer_destroy_log_buf(struct mlx5_fw_tracer *tracer)
162165 {
163166 struct mlx5_core_dev *dev = tracer->dev;
164
- struct device *ddev = &dev->pdev->dev;
167
+ struct device *ddev;
165168
166169 if (!tracer->buff.log_buf)
167170 return;
168171
172
+ ddev = mlx5_core_dma_dev(dev);
169173 dma_unmap_single(ddev, tracer->buff.dma, tracer->buff.size, DMA_FROM_DEVICE);
170174 free_pages((unsigned long)tracer->buff.log_buf, get_order(tracer->buff.size));
171175 }
....@@ -187,7 +191,7 @@
187191
188192 MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
189193 DIV_ROUND_UP(TRACER_BUFFER_PAGE_NUM, 2));
190
- mtt = (u64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
194
+ mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
191195 for (i = 0 ; i < TRACER_BUFFER_PAGE_NUM ; i++)
192196 mtt[i] = cpu_to_be64(tracer->buff.dma + i * PAGE_SIZE);
193197
....@@ -240,6 +244,19 @@
240244 free_strings_db:
241245 mlx5_fw_tracer_free_strings_db(tracer);
242246 return -ENOMEM;
247
+}
248
+
249
+static void
250
+mlx5_fw_tracer_init_saved_traces_array(struct mlx5_fw_tracer *tracer)
251
+{
252
+ tracer->st_arr.saved_traces_index = 0;
253
+ mutex_init(&tracer->st_arr.lock);
254
+}
255
+
256
+static void
257
+mlx5_fw_tracer_clean_saved_traces_array(struct mlx5_fw_tracer *tracer)
258
+{
259
+ mutex_destroy(&tracer->st_arr.lock);
243260 }
244261
245262 static void mlx5_tracer_read_strings_db(struct work_struct *work)
....@@ -466,7 +483,7 @@
466483 (u64)timestamp_low;
467484 break;
468485 default:
469
- if (tracer_event->event_id >= tracer->str_db.first_string_trace ||
486
+ if (tracer_event->event_id >= tracer->str_db.first_string_trace &&
470487 tracer_event->event_id <= tracer->str_db.first_string_trace +
471488 tracer->str_db.num_string_trace) {
472489 tracer_event->type = TRACER_EVENT_TYPE_STRING;
....@@ -521,9 +538,28 @@
521538 list_del(&str_frmt->list);
522539 }
523540
524
-static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
525
- struct mlx5_core_dev *dev,
526
- u64 trace_timestamp)
541
+static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer,
542
+ u64 timestamp, bool lost,
543
+ u8 event_id, char *msg)
544
+{
545
+ struct mlx5_fw_trace_data *trace_data;
546
+
547
+ mutex_lock(&tracer->st_arr.lock);
548
+ trace_data = &tracer->st_arr.straces[tracer->st_arr.saved_traces_index];
549
+ trace_data->timestamp = timestamp;
550
+ trace_data->lost = lost;
551
+ trace_data->event_id = event_id;
552
+ strscpy_pad(trace_data->msg, msg, TRACE_STR_MSG);
553
+
554
+ tracer->st_arr.saved_traces_index =
555
+ (tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1);
556
+ mutex_unlock(&tracer->st_arr.lock);
557
+}
558
+
559
+static noinline
560
+void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
561
+ struct mlx5_core_dev *dev,
562
+ u64 trace_timestamp)
527563 {
528564 char tmp[512];
529565
....@@ -538,6 +574,9 @@
538574
539575 trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
540576 str_frmt->event_id, tmp);
577
+
578
+ mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp,
579
+ str_frmt->lost, str_frmt->event_id, tmp);
541580
542581 /* remove it from hash */
543582 mlx5_tracer_clean_message(str_frmt);
....@@ -564,7 +603,7 @@
564603 } else {
565604 cur_string = mlx5_tracer_message_get(tracer, tracer_event);
566605 if (!cur_string) {
567
- pr_debug("%s Got string event for unknown string tdsm: %d\n",
606
+ pr_debug("%s Got string event for unknown string tmsn: %d\n",
568607 __func__, tracer_event->string_event.tmsn);
569608 return -1;
570609 }
....@@ -600,7 +639,7 @@
600639 trace_timestamp = (timestamp_event.timestamp & MASK_52_7) |
601640 (str_frmt->timestamp & MASK_6_0);
602641 else
603
- trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) |
642
+ trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) |
604643 (str_frmt->timestamp & MASK_6_0);
605644
606645 mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp);
....@@ -637,10 +676,13 @@
637676 if (!tracer->owner)
638677 return;
639678
679
+ if (unlikely(!tracer->str_db.loaded))
680
+ goto arm;
681
+
640682 block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
641683 start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
642684
643
- /* Copy the block to local buffer to avoid HW override while being processed*/
685
+ /* Copy the block to local buffer to avoid HW override while being processed */
644686 memcpy(tmp_trace_block, tracer->buff.log_buf + start_offset,
645687 TRACER_BLOCK_SIZE_BYTE);
646688
....@@ -648,7 +690,7 @@
648690 get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]);
649691
650692 while (block_timestamp > tracer->last_timestamp) {
651
- /* Check block override if its not the first block */
693
+ /* Check block override if it's not the first block */
652694 if (!tracer->last_timestamp) {
653695 u64 *ts_event;
654696 /* To avoid block override be the HW in case of buffer
....@@ -694,6 +736,7 @@
694736 &tmp_trace_block[TRACES_PER_BLOCK - 1]);
695737 }
696738
739
+arm:
697740 mlx5_fw_tracer_arm(dev);
698741 }
699742
....@@ -714,6 +757,7 @@
714757 if (err)
715758 mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err);
716759
760
+ tracer->buff.consumer_index = 0;
717761 return err;
718762 }
719763
....@@ -778,11 +822,113 @@
778822 mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner);
779823 if (tracer->owner) {
780824 tracer->owner = false;
781
- tracer->buff.consumer_index = 0;
782825 return;
783826 }
784827
785828 mlx5_fw_tracer_start(tracer);
829
+}
830
+
831
+static int mlx5_fw_tracer_set_core_dump_reg(struct mlx5_core_dev *dev,
832
+ u32 *in, int size_in)
833
+{
834
+ u32 out[MLX5_ST_SZ_DW(core_dump_reg)] = {};
835
+
836
+ if (!MLX5_CAP_DEBUG(dev, core_dump_general) &&
837
+ !MLX5_CAP_DEBUG(dev, core_dump_qp))
838
+ return -EOPNOTSUPP;
839
+
840
+ return mlx5_core_access_reg(dev, in, size_in, out, sizeof(out),
841
+ MLX5_REG_CORE_DUMP, 0, 1);
842
+}
843
+
844
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev)
845
+{
846
+ struct mlx5_fw_tracer *tracer = dev->tracer;
847
+ u32 in[MLX5_ST_SZ_DW(core_dump_reg)] = {};
848
+ int err;
849
+
850
+ if (!MLX5_CAP_DEBUG(dev, core_dump_general) || !tracer)
851
+ return -EOPNOTSUPP;
852
+ if (!tracer->owner)
853
+ return -EPERM;
854
+
855
+ MLX5_SET(core_dump_reg, in, core_dump_type, 0x0);
856
+
857
+ err = mlx5_fw_tracer_set_core_dump_reg(dev, in, sizeof(in));
858
+ if (err)
859
+ return err;
860
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
861
+ flush_workqueue(tracer->work_queue);
862
+ return 0;
863
+}
864
+
865
+static int
866
+mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg,
867
+ struct mlx5_fw_trace_data *trace_data)
868
+{
869
+ int err;
870
+
871
+ err = devlink_fmsg_obj_nest_start(fmsg);
872
+ if (err)
873
+ return err;
874
+
875
+ err = devlink_fmsg_u64_pair_put(fmsg, "timestamp", trace_data->timestamp);
876
+ if (err)
877
+ return err;
878
+
879
+ err = devlink_fmsg_bool_pair_put(fmsg, "lost", trace_data->lost);
880
+ if (err)
881
+ return err;
882
+
883
+ err = devlink_fmsg_u8_pair_put(fmsg, "event_id", trace_data->event_id);
884
+ if (err)
885
+ return err;
886
+
887
+ err = devlink_fmsg_string_pair_put(fmsg, "msg", trace_data->msg);
888
+ if (err)
889
+ return err;
890
+
891
+ err = devlink_fmsg_obj_nest_end(fmsg);
892
+ if (err)
893
+ return err;
894
+ return 0;
895
+}
896
+
897
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
898
+ struct devlink_fmsg *fmsg)
899
+{
900
+ struct mlx5_fw_trace_data *straces = tracer->st_arr.straces;
901
+ u32 index, start_index, end_index;
902
+ u32 saved_traces_index;
903
+ int err;
904
+
905
+ if (!straces[0].timestamp)
906
+ return -ENOMSG;
907
+
908
+ mutex_lock(&tracer->st_arr.lock);
909
+ saved_traces_index = tracer->st_arr.saved_traces_index;
910
+ if (straces[saved_traces_index].timestamp)
911
+ start_index = saved_traces_index;
912
+ else
913
+ start_index = 0;
914
+ end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1);
915
+
916
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump fw traces");
917
+ if (err)
918
+ goto unlock;
919
+ index = start_index;
920
+ while (index != end_index) {
921
+ err = mlx5_devlink_fmsg_fill_trace(fmsg, &straces[index]);
922
+ if (err)
923
+ goto unlock;
924
+
925
+ index = (index + 1) & (SAVED_TRACES_NUM - 1);
926
+ }
927
+
928
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
929
+unlock:
930
+ mutex_unlock(&tracer->st_arr.lock);
931
+ return err;
786932 }
787933
788934 /* Create software resources (Buffers, etc ..) */
....@@ -832,6 +978,7 @@
832978 goto free_log_buf;
833979 }
834980
981
+ mlx5_fw_tracer_init_saved_traces_array(tracer);
835982 mlx5_core_dbg(dev, "FWTracer: Tracer created\n");
836983
837984 return tracer;
....@@ -846,9 +993,9 @@
846993 return ERR_PTR(err);
847994 }
848995
849
-/* Create HW resources + start tracer
850
- * must be called before Async EQ is created
851
- */
996
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data);
997
+
998
+/* Create HW resources + start tracer */
852999 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
8531000 {
8541001 struct mlx5_core_dev *dev;
....@@ -865,7 +1012,7 @@
8651012 err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn);
8661013 if (err) {
8671014 mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err);
868
- return err;
1015
+ goto err_cancel_work;
8691016 }
8701017
8711018 err = mlx5_fw_tracer_create_mkey(tracer);
....@@ -874,18 +1021,27 @@
8741021 goto err_dealloc_pd;
8751022 }
8761023
877
- mlx5_fw_tracer_start(tracer);
1024
+ MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER);
1025
+ mlx5_eq_notifier_register(dev, &tracer->nb);
8781026
1027
+ err = mlx5_fw_tracer_start(tracer);
1028
+ if (err) {
1029
+ mlx5_core_warn(dev, "FWTracer: Failed to start tracer %d\n", err);
1030
+ goto err_notifier_unregister;
1031
+ }
8791032 return 0;
8801033
1034
+err_notifier_unregister:
1035
+ mlx5_eq_notifier_unregister(dev, &tracer->nb);
1036
+ mlx5_core_destroy_mkey(dev, &tracer->buff.mkey);
8811037 err_dealloc_pd:
8821038 mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
1039
+err_cancel_work:
1040
+ cancel_work_sync(&tracer->read_fw_strings_work);
8831041 return err;
8841042 }
8851043
886
-/* Stop tracer + Cleanup HW resources
887
- * must be called after Async EQ is destroyed
888
- */
1044
+/* Stop tracer + Cleanup HW resources */
8891045 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
8901046 {
8911047 if (IS_ERR_OR_NULL(tracer))
....@@ -893,7 +1049,7 @@
8931049
8941050 mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n",
8951051 tracer->owner);
896
-
1052
+ mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb);
8971053 cancel_work_sync(&tracer->ownership_change_work);
8981054 cancel_work_sync(&tracer->handle_traces_work);
8991055
....@@ -915,6 +1071,7 @@
9151071 cancel_work_sync(&tracer->read_fw_strings_work);
9161072 mlx5_fw_tracer_clean_ready_list(tracer);
9171073 mlx5_fw_tracer_clean_print_hash(tracer);
1074
+ mlx5_fw_tracer_clean_saved_traces_array(tracer);
9181075 mlx5_fw_tracer_free_strings_db(tracer);
9191076 mlx5_fw_tracer_destroy_log_buf(tracer);
9201077 flush_workqueue(tracer->work_queue);
....@@ -922,12 +1079,63 @@
9221079 kvfree(tracer);
9231080 }
9241081
925
-void mlx5_fw_tracer_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
1082
+static int mlx5_fw_tracer_recreate_strings_db(struct mlx5_fw_tracer *tracer)
9261083 {
927
- struct mlx5_fw_tracer *tracer = dev->tracer;
1084
+ struct mlx5_core_dev *dev;
1085
+ int err;
9281086
929
- if (!tracer)
930
- return;
1087
+ cancel_work_sync(&tracer->read_fw_strings_work);
1088
+ mlx5_fw_tracer_clean_ready_list(tracer);
1089
+ mlx5_fw_tracer_clean_print_hash(tracer);
1090
+ mlx5_fw_tracer_clean_saved_traces_array(tracer);
1091
+ mlx5_fw_tracer_free_strings_db(tracer);
1092
+
1093
+ dev = tracer->dev;
1094
+ err = mlx5_query_mtrc_caps(tracer);
1095
+ if (err) {
1096
+ mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err);
1097
+ return err;
1098
+ }
1099
+
1100
+ err = mlx5_fw_tracer_allocate_strings_db(tracer);
1101
+ if (err) {
1102
+ mlx5_core_warn(dev, "FWTracer: Allocate strings DB failed %d\n", err);
1103
+ return err;
1104
+ }
1105
+ mlx5_fw_tracer_init_saved_traces_array(tracer);
1106
+
1107
+ return 0;
1108
+}
1109
+
1110
+int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer)
1111
+{
1112
+ struct mlx5_core_dev *dev;
1113
+ int err;
1114
+
1115
+ if (IS_ERR_OR_NULL(tracer))
1116
+ return -EINVAL;
1117
+
1118
+ dev = tracer->dev;
1119
+ mlx5_fw_tracer_cleanup(tracer);
1120
+ err = mlx5_fw_tracer_recreate_strings_db(tracer);
1121
+ if (err) {
1122
+ mlx5_core_warn(dev, "Failed to recreate FW tracer strings DB\n");
1123
+ return err;
1124
+ }
1125
+ err = mlx5_fw_tracer_init(tracer);
1126
+ if (err) {
1127
+ mlx5_core_warn(dev, "Failed to re-initialize FW tracer\n");
1128
+ return err;
1129
+ }
1130
+
1131
+ return 0;
1132
+}
1133
+
1134
+static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void *data)
1135
+{
1136
+ struct mlx5_fw_tracer *tracer = mlx5_nb_cof(nb, struct mlx5_fw_tracer, nb);
1137
+ struct mlx5_core_dev *dev = tracer->dev;
1138
+ struct mlx5_eqe *eqe = data;
9311139
9321140 switch (eqe->sub_type) {
9331141 case MLX5_TRACER_SUBTYPE_OWNERSHIP_CHANGE:
....@@ -935,13 +1143,14 @@
9351143 queue_work(tracer->work_queue, &tracer->ownership_change_work);
9361144 break;
9371145 case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
938
- if (likely(tracer->str_db.loaded))
939
- queue_work(tracer->work_queue, &tracer->handle_traces_work);
1146
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
9401147 break;
9411148 default:
9421149 mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
9431150 eqe->sub_type);
9441151 }
1152
+
1153
+ return NOTIFY_OK;
9451154 }
9461155
9471156 EXPORT_TRACEPOINT_SYMBOL(mlx5_fw);