From f9004dbfff8a3fbbd7e2a88c8a4327c7f2f8e5b2 Mon Sep 17 00:00:00 2001
From: hc <hc@nodka.com>
Date: Wed, 31 Jan 2024 01:04:47 +0000
Subject: [PATCH] add driver 5G

---
 kernel/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c |  707 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 653 insertions(+), 54 deletions(-)

diff --git a/kernel/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/kernel/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 6d29dc4..7ec1d0e 100644
--- a/kernel/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/kernel/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -9,17 +9,41 @@
 #include <linux/sysfs.h>
 #include <linux/thermal.h>
 #include <linux/err.h>
+#include <linux/sfp.h>
 
 #include "core.h"
+#include "core_env.h"
 
 #define MLXSW_THERMAL_POLL_INT	1000	/* ms */
-#define MLXSW_THERMAL_MAX_TEMP	110000	/* 110C */
+#define MLXSW_THERMAL_SLOW_POLL_INT	20000	/* ms */
+#define MLXSW_THERMAL_ASIC_TEMP_NORM	75000	/* 75C */
+#define MLXSW_THERMAL_ASIC_TEMP_HIGH	85000	/* 85C */
+#define MLXSW_THERMAL_ASIC_TEMP_HOT	105000	/* 105C */
+#define MLXSW_THERMAL_ASIC_TEMP_CRIT	140000	/* 140C */
+#define MLXSW_THERMAL_HYSTERESIS_TEMP	5000	/* 5C */
+#define MLXSW_THERMAL_MODULE_TEMP_SHIFT	(MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
+#define MLXSW_THERMAL_ZONE_MAX_NAME	16
+#define MLXSW_THERMAL_TEMP_SCORE_MAX	GENMASK(31, 0)
 #define MLXSW_THERMAL_MAX_STATE	10
+#define MLXSW_THERMAL_MIN_STATE	2
 #define MLXSW_THERMAL_MAX_DUTY	255
+
+/* External cooling devices, allowed for binding to mlxsw thermal zones. */
+static char * const mlxsw_thermal_external_allowed_cdev[] = {
+	"mlxreg_fan",
+};
+
+enum mlxsw_thermal_trips {
+	MLXSW_THERMAL_TEMP_TRIP_NORM,
+	MLXSW_THERMAL_TEMP_TRIP_HIGH,
+	MLXSW_THERMAL_TEMP_TRIP_HOT,
+	MLXSW_THERMAL_TEMP_TRIP_CRIT,
+};
 
 struct mlxsw_thermal_trip {
 	int	type;
 	int	temp;
+	int	hyst;
 	int	min_state;
 	int	max_state;
 };
@@ -27,32 +51,29 @@
 static const struct mlxsw_thermal_trip default_thermal_trips[] = {
 	{	/* In range - 0-40% PWM */
 		.type		= THERMAL_TRIP_ACTIVE,
-		.temp		= 75000,
+		.temp		= MLXSW_THERMAL_ASIC_TEMP_NORM,
+		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
 		.min_state	= 0,
 		.max_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
 	},
-	{	/* High - 40-100% PWM */
-		.type		= THERMAL_TRIP_ACTIVE,
-		.temp		= 80000,
-		.min_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
-		.max_state	= MLXSW_THERMAL_MAX_STATE,
-	},
 	{
-		/* Very high - 100% PWM */
+		/* In range - 40-100% PWM */
 		.type		= THERMAL_TRIP_ACTIVE,
-		.temp		= 85000,
-		.min_state	= MLXSW_THERMAL_MAX_STATE,
+		.temp		= MLXSW_THERMAL_ASIC_TEMP_HIGH,
+		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
+		.min_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
 		.max_state	= MLXSW_THERMAL_MAX_STATE,
 	},
 	{	/* Warning */
 		.type		= THERMAL_TRIP_HOT,
-		.temp		= 105000,
+		.temp		= MLXSW_THERMAL_ASIC_TEMP_HOT,
+		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
 		.min_state	= MLXSW_THERMAL_MAX_STATE,
 		.max_state	= MLXSW_THERMAL_MAX_STATE,
 	},
 	{	/* Critical - soft poweroff */
 		.type		= THERMAL_TRIP_CRITICAL,
-		.temp		= MLXSW_THERMAL_MAX_TEMP,
+		.temp		= MLXSW_THERMAL_ASIC_TEMP_CRIT,
 		.min_state	= MLXSW_THERMAL_MAX_STATE,
 		.max_state	= MLXSW_THERMAL_MAX_STATE,
 	}
@@ -63,13 +84,29 @@
 /* Make sure all trips are writable */
 #define MLXSW_THERMAL_TRIP_MASK	(BIT(MLXSW_THERMAL_NUM_TRIPS) - 1)
 
+struct mlxsw_thermal;
+
+struct mlxsw_thermal_module {
+	struct mlxsw_thermal *parent;
+	struct thermal_zone_device *tzdev;
+	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
+	int module; /* Module or gearbox number */
+};
+
 struct mlxsw_thermal {
 	struct mlxsw_core *core;
 	const struct mlxsw_bus_info *bus_info;
 	struct thermal_zone_device *tzdev;
+	int polling_delay;
 	struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
+	u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1];
 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
-	enum thermal_device_mode mode;
+	struct mlxsw_thermal_module *tz_module_arr;
+	u8 tz_module_num;
+	struct mlxsw_thermal_module *tz_gearbox_arr;
+	u8 tz_gearbox_num;
+	unsigned int tz_highest_score;
+	struct thermal_zone_device *tz_highest_dev;
 };
 
 static inline u8 mlxsw_state_to_duty(int state)
@@ -93,7 +130,96 @@
 		if (thermal->cdevs[i] == cdev)
 			return i;
 
+	/* Allow mlxsw thermal zone binding to an external cooling device */
+	for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
+		if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
+			    sizeof(cdev->type)))
+			return 0;
+	}
+
 	return -ENODEV;
+}
+
+static void
+mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
+{
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0;
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0;
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0;
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = 0;
+}
+
+static int
+mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
+				  struct mlxsw_thermal_module *tz)
+{
+	int crit_temp, emerg_temp;
+	int err;
+
+	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
+						   SFP_TEMP_HIGH_WARN,
+						   &crit_temp);
+	if (err)
+		return err;
+
+	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
+						   SFP_TEMP_HIGH_ALARM,
+						   &emerg_temp);
+	if (err)
+		return err;
+
+	if (crit_temp > emerg_temp) {
+		dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n",
+			 tz->tzdev->type, crit_temp, emerg_temp);
+		return 0;
+	}
+
+	/* According to the system thermal requirements, the thermal zones are
+	 * defined with four trip points. The critical and emergency
+	 * temperature thresholds, provided by QSFP module are set as "active"
+	 * and "hot" trip points, "normal" and "critical" trip points are
+	 * derived from "active" and "hot" by subtracting or adding double
+	 * hysteresis value.
+	 */
+	if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
+		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp -
+					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
+	else
+		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp;
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp;
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp;
+	tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp +
+					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
+
+	return 0;
+}
+
+static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal,
+					  struct thermal_zone_device *tzdev,
+					  struct mlxsw_thermal_trip *trips,
+					  int temp)
+{
+	struct mlxsw_thermal_trip *trip = trips;
+	unsigned int score, delta, i, shift = 1;
+
+	/* Calculate thermal zone score, if temperature is above the critical
+	 * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX.
+	 */
+	score = MLXSW_THERMAL_TEMP_SCORE_MAX;
+	for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS;
+	     i++, trip++) {
+		if (temp < trip->temp) {
+			delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp);
+			score = delta * shift;
+			break;
+		}
+		shift *= 256;
+	}
+
+	if (score > thermal->tz_highest_score) {
+		thermal->tz_highest_score = score;
+		thermal->tz_highest_dev = tzdev;
+	}
 }
 
 static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
@@ -144,43 +270,13 @@
 	return 0;
 }
 
-static int mlxsw_thermal_get_mode(struct thermal_zone_device *tzdev,
-				  enum thermal_device_mode *mode)
-{
-	struct mlxsw_thermal *thermal = tzdev->devdata;
-
-	*mode = thermal->mode;
-
-	return 0;
-}
-
-static int mlxsw_thermal_set_mode(struct thermal_zone_device *tzdev,
-				  enum thermal_device_mode mode)
-{
-	struct mlxsw_thermal *thermal = tzdev->devdata;
-
-	mutex_lock(&tzdev->lock);
-
-	if (mode == THERMAL_DEVICE_ENABLED)
-		tzdev->polling_delay = MLXSW_THERMAL_POLL_INT;
-	else
-		tzdev->polling_delay = 0;
-
-	mutex_unlock(&tzdev->lock);
-
-	thermal->mode = mode;
-	thermal_zone_device_update(tzdev, THERMAL_EVENT_UNSPECIFIED);
-
-	return 0;
-}
-
 static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
 				  int *p_temp)
 {
 	struct mlxsw_thermal *thermal = tzdev->devdata;
 	struct device *dev = thermal->bus_info->dev;
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
-	unsigned int temp;
+	int temp;
 	int err;
 
 	mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
@@ -191,8 +287,11 @@
 		return err;
 	}
 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	if (temp > 0)
+		mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
+					      temp);
 
-	*p_temp = (int) temp;
+	*p_temp = temp;
 	return 0;
 }
 
@@ -227,22 +326,265 @@
 	struct mlxsw_thermal *thermal = tzdev->devdata;
 
 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
-	    temp > MLXSW_THERMAL_MAX_TEMP)
+	    temp > MLXSW_THERMAL_ASIC_TEMP_CRIT)
 		return -EINVAL;
 
 	thermal->trips[trip].temp = temp;
 	return 0;
 }
 
+static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev,
+				       int trip, int *p_hyst)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+
+	*p_hyst = thermal->trips[trip].hyst;
+	return 0;
+}
+
+static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev,
+				       int trip, int hyst)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+
+	thermal->trips[trip].hyst = hyst;
+	return 0;
+}
+
+static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev,
+				   int trip, enum thermal_trend *trend)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+		return -EINVAL;
+
+	if (tzdev == thermal->tz_highest_dev)
+		return 1;
+
+	*trend = THERMAL_TREND_STABLE;
+	return 0;
+}
+
 static struct thermal_zone_device_ops mlxsw_thermal_ops = {
 	.bind = mlxsw_thermal_bind,
 	.unbind = mlxsw_thermal_unbind,
-	.get_mode = mlxsw_thermal_get_mode,
-	.set_mode = mlxsw_thermal_set_mode,
 	.get_temp = mlxsw_thermal_get_temp,
 	.get_trip_type	= mlxsw_thermal_get_trip_type,
 	.get_trip_temp	= mlxsw_thermal_get_trip_temp,
 	.set_trip_temp	= mlxsw_thermal_set_trip_temp,
+	.get_trip_hyst	= mlxsw_thermal_get_trip_hyst,
+	.set_trip_hyst	= mlxsw_thermal_set_trip_hyst,
+	.get_trend	= mlxsw_thermal_trend_get,
+};
+
+static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev,
+				     struct thermal_cooling_device *cdev)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+	int i, j, err;
+
+	/* If the cooling device is one of ours bind it */
+	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
+		return 0;
+
+	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
+		const struct mlxsw_thermal_trip *trip = &tz->trips[i];
+
+		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
+						       trip->max_state,
+						       trip->min_state,
+						       THERMAL_WEIGHT_DEFAULT);
+		if (err < 0)
+			goto err_bind_cooling_device;
+	}
+	return 0;
+
+err_bind_cooling_device:
+	for (j = i - 1; j >= 0; j--)
+		thermal_zone_unbind_cooling_device(tzdev, j, cdev);
+	return err;
+}
+
+static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
+				       struct thermal_cooling_device *cdev)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+	int i;
+	int err;
+
+	/* If the cooling device is one of ours unbind it */
+	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
+		return 0;
+
+	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
+		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
+		WARN_ON(err);
+	}
+	return err;
+}
+
+static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
+					 int *p_temp)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+	struct device *dev = thermal->bus_info->dev;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	int temp;
+	int err;
+
+	/* Read module temperature. */
+	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN +
+			    tz->module, false, false);
+	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err) {
+		/* Do not return error - in case of broken module's sensor
+		 * it will cause error message flooding.
+		 */
+		temp = 0;
+		*p_temp = (int) temp;
+		return 0;
+	}
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	*p_temp = temp;
+
+	if (!temp)
+		return 0;
+
+	/* Update trip points. */
+	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
+	if (!err && temp > 0)
+		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
+
+	return 0;
+}
+
+static int
+mlxsw_thermal_module_trip_type_get(struct thermal_zone_device *tzdev, int trip,
+				   enum thermal_trip_type *p_type)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+		return -EINVAL;
+
+	*p_type = tz->trips[trip].type;
+	return 0;
+}
+
+static int
+mlxsw_thermal_module_trip_temp_get(struct thermal_zone_device *tzdev,
+				   int trip, int *p_temp)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+		return -EINVAL;
+
+	*p_temp = tz->trips[trip].temp;
+	return 0;
+}
+
+static int
+mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev,
+				   int trip, int temp)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
+	    temp > tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp)
+		return -EINVAL;
+
+	tz->trips[trip].temp = temp;
+	return 0;
+}
+
+static int
+mlxsw_thermal_module_trip_hyst_get(struct thermal_zone_device *tzdev, int trip,
+				   int *p_hyst)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+
+	*p_hyst = tz->trips[trip].hyst;
+	return 0;
+}
+
+static int
+mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip,
+				   int hyst)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+
+	tz->trips[trip].hyst = hyst;
+	return 0;
+}
+
+static int mlxsw_thermal_module_trend_get(struct thermal_zone_device *tzdev,
+					  int trip, enum thermal_trend *trend)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+		return -EINVAL;
+
+	if (tzdev == thermal->tz_highest_dev)
+		return 1;
+
+	*trend = THERMAL_TREND_STABLE;
+	return 0;
+}
+
+static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
+	.bind		= mlxsw_thermal_module_bind,
+	.unbind		= mlxsw_thermal_module_unbind,
+	.get_temp	= mlxsw_thermal_module_temp_get,
+	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
+	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
+	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
+	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
+	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
+	.get_trend	= mlxsw_thermal_module_trend_get,
+};
+
+static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
+					  int *p_temp)
+{
+	struct mlxsw_thermal_module *tz = tzdev->devdata;
+	struct mlxsw_thermal *thermal = tz->parent;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	u16 index;
+	int temp;
+	int err;
+
+	index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module;
+	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
+
+	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+	if (temp > 0)
+		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
+
+	*p_temp = temp;
+	return 0;
+}
+
+static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = {
+	.bind		= mlxsw_thermal_module_bind,
+	.unbind		= mlxsw_thermal_module_unbind,
+	.get_temp	= mlxsw_thermal_gearbox_temp_get,
+	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
+	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
+	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
+	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
+	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
+	.get_trend	= mlxsw_thermal_module_trend_get,
 };
 
 static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
@@ -285,12 +627,18 @@
 	struct mlxsw_thermal *thermal = cdev->devdata;
 	struct device *dev = thermal->bus_info->dev;
 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
-	int err, idx;
+	int idx;
+	int err;
+
+	if (state > MLXSW_THERMAL_MAX_STATE)
+		return -EINVAL;
 
 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
 	if (idx < 0)
 		return idx;
 
+	/* Normalize the state to the valid speed range. */
+	state = thermal->cooling_levels[state];
 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
 	err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
 	if (err) {
@@ -305,6 +653,225 @@
 	.get_cur_state	= mlxsw_thermal_get_cur_state,
 	.set_cur_state	= mlxsw_thermal_set_cur_state,
 };
+
+static int
+mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
+{
+	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
+	int err;
+
+	snprintf(tz_name, sizeof(tz_name), "mlxsw-module%d",
+		 module_tz->module + 1);
+	module_tz->tzdev = thermal_zone_device_register(tz_name,
+							MLXSW_THERMAL_NUM_TRIPS,
+							MLXSW_THERMAL_TRIP_MASK,
+							module_tz,
+							&mlxsw_thermal_module_ops,
+							NULL, 0,
+							module_tz->parent->polling_delay);
+	if (IS_ERR(module_tz->tzdev)) {
+		err = PTR_ERR(module_tz->tzdev);
+		return err;
+	}
+
+	err = thermal_zone_device_enable(module_tz->tzdev);
+	if (err)
+		thermal_zone_device_unregister(module_tz->tzdev);
+
+	return err;
+}
+
+static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
+{
+	thermal_zone_device_unregister(tzdev);
+}
+
+static int
+mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
+			  struct mlxsw_thermal *thermal, u8 module)
+{
+	struct mlxsw_thermal_module *module_tz;
+
+	module_tz = &thermal->tz_module_arr[module];
+	/* Skip if parent is already set (case of port split). */
+	if (module_tz->parent)
+		return 0;
+	module_tz->module = module;
+	module_tz->parent = thermal;
+	memcpy(module_tz->trips, default_thermal_trips,
+	       sizeof(thermal->trips));
+	/* Initialize all trip point. */
+	mlxsw_thermal_module_trips_reset(module_tz);
+	/* Update trip point according to the module data. */
+	return mlxsw_thermal_module_trips_update(dev, core, module_tz);
+}
+
+static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
+{
+	if (module_tz && module_tz->tzdev) {
+		mlxsw_thermal_module_tz_fini(module_tz->tzdev);
+		module_tz->tzdev = NULL;
+		module_tz->parent = NULL;
+	}
+}
+
+static int
+mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
+			   struct mlxsw_thermal *thermal)
+{
+	struct mlxsw_thermal_module *module_tz;
+	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+	int i, err;
+
+	if (!mlxsw_core_res_query_enabled(core))
+		return 0;
+
+	mlxsw_reg_mgpir_pack(mgpir_pl);
+	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
+			       &thermal->tz_module_num);
+
+	thermal->tz_module_arr = kcalloc(thermal->tz_module_num,
+					 sizeof(*thermal->tz_module_arr),
+					 GFP_KERNEL);
+	if (!thermal->tz_module_arr)
+		return -ENOMEM;
+
+	for (i = 0; i < thermal->tz_module_num; i++) {
+		err = mlxsw_thermal_module_init(dev, core, thermal, i);
+		if (err)
+			goto err_unreg_tz_module_arr;
+	}
+
+	for (i = 0; i < thermal->tz_module_num; i++) {
+		module_tz = &thermal->tz_module_arr[i];
+		if (!module_tz->parent)
+			continue;
+		err = mlxsw_thermal_module_tz_init(module_tz);
+		if (err)
+			goto err_unreg_tz_module_arr;
+	}
+
+	return 0;
+
+err_unreg_tz_module_arr:
+	for (i = thermal->tz_module_num - 1; i >= 0; i--)
+		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
+	kfree(thermal->tz_module_arr);
+	return err;
+}
+
+static void
+mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
+{
+	int i;
+
+	if (!mlxsw_core_res_query_enabled(thermal->core))
+		return;
+
+	for (i = thermal->tz_module_num - 1; i >= 0; i--)
+		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
+	kfree(thermal->tz_module_arr);
+}
+
+static int
+mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
+{
+	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
+	int ret;
+
+	snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d",
+		 gearbox_tz->module + 1);
+	gearbox_tz->tzdev = thermal_zone_device_register(tz_name,
+						MLXSW_THERMAL_NUM_TRIPS,
+						MLXSW_THERMAL_TRIP_MASK,
+						gearbox_tz,
+						&mlxsw_thermal_gearbox_ops,
+						NULL, 0,
+						gearbox_tz->parent->polling_delay);
+	if (IS_ERR(gearbox_tz->tzdev))
+		return PTR_ERR(gearbox_tz->tzdev);
+
+	ret = thermal_zone_device_enable(gearbox_tz->tzdev);
+	if (ret)
+		thermal_zone_device_unregister(gearbox_tz->tzdev);
+
+	return ret;
+}
+
+static void
+mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz)
+{
+	thermal_zone_device_unregister(gearbox_tz->tzdev);
+}
+
+static int
+mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
+			     struct mlxsw_thermal *thermal)
+{
+	enum mlxsw_reg_mgpir_device_type device_type;
+	struct mlxsw_thermal_module *gearbox_tz;
+	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+	u8 gbox_num;
+	int i;
+	int err;
+
+	if (!mlxsw_core_res_query_enabled(core))
+		return 0;
+
+	mlxsw_reg_mgpir_pack(mgpir_pl);
+	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL,
+			       NULL);
+	if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE ||
+	    !gbox_num)
+		return 0;
+
+	thermal->tz_gearbox_num = gbox_num;
+	thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num,
+					  sizeof(*thermal->tz_gearbox_arr),
+					  GFP_KERNEL);
+	if (!thermal->tz_gearbox_arr)
+		return -ENOMEM;
+
+	for (i = 0; i < thermal->tz_gearbox_num; i++) {
+		gearbox_tz = &thermal->tz_gearbox_arr[i];
+		memcpy(gearbox_tz->trips, default_thermal_trips,
+		       sizeof(thermal->trips));
+		gearbox_tz->module = i;
+		gearbox_tz->parent = thermal;
+		err = mlxsw_thermal_gearbox_tz_init(gearbox_tz);
+		if (err)
+			goto err_unreg_tz_gearbox;
+	}
+
+	return 0;
+
+err_unreg_tz_gearbox:
+	for (i--; i >= 0; i--)
+		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
+	kfree(thermal->tz_gearbox_arr);
+	return err;
+}
+
+static void
+mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal)
+{
+	int i;
+
+	if (!mlxsw_core_res_query_enabled(thermal->core))
+		return;
+
+	for (i = thermal->tz_gearbox_num - 1; i >= 0; i--)
+		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
+	kfree(thermal->tz_gearbox_arr);
+}
 
 int mlxsw_thermal_init(struct mlxsw_core *core,
 		       const struct mlxsw_bus_info *bus_info,
@@ -358,8 +925,9 @@
 		if (pwm_active & BIT(i)) {
 			struct thermal_cooling_device *cdev;
 
-			cdev = thermal_cooling_device_register("Fan", thermal,
-							&mlxsw_cooling_ops);
+			cdev = thermal_cooling_device_register("mlxsw_fan",
+							       thermal,
+							       &mlxsw_cooling_ops);
 			if (IS_ERR(cdev)) {
 				err = PTR_ERR(cdev);
 				dev_err(dev, "Failed to register cooling device\n");
@@ -369,22 +937,51 @@
 		}
 	}
 
+	/* Initialize cooling levels per PWM state. */
+	for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++)
+		thermal->cooling_levels[i] = max(MLXSW_THERMAL_MIN_STATE, i);
+
+	thermal->polling_delay = bus_info->low_frequency ?
+				 MLXSW_THERMAL_SLOW_POLL_INT :
+				 MLXSW_THERMAL_POLL_INT;
+
 	thermal->tzdev = thermal_zone_device_register("mlxsw",
 						      MLXSW_THERMAL_NUM_TRIPS,
 						      MLXSW_THERMAL_TRIP_MASK,
 						      thermal,
 						      &mlxsw_thermal_ops,
 						      NULL, 0,
-						      MLXSW_THERMAL_POLL_INT);
+						      thermal->polling_delay);
 	if (IS_ERR(thermal->tzdev)) {
 		err = PTR_ERR(thermal->tzdev);
 		dev_err(dev, "Failed to register thermal zone\n");
 		goto err_unreg_cdevs;
 	}
 
-	thermal->mode = THERMAL_DEVICE_ENABLED;
+	err = mlxsw_thermal_modules_init(dev, core, thermal);
+	if (err)
+		goto err_unreg_tzdev;
+
+	err = mlxsw_thermal_gearboxes_init(dev, core, thermal);
+	if (err)
+		goto err_unreg_modules_tzdev;
+
+	err = thermal_zone_device_enable(thermal->tzdev);
+	if (err)
+		goto err_unreg_gearboxes;
+
 	*p_thermal = thermal;
 	return 0;
+
+err_unreg_gearboxes:
+	mlxsw_thermal_gearboxes_fini(thermal);
+err_unreg_modules_tzdev:
+	mlxsw_thermal_modules_fini(thermal);
+err_unreg_tzdev:
+	if (thermal->tzdev) {
+		thermal_zone_device_unregister(thermal->tzdev);
+		thermal->tzdev = NULL;
+	}
 err_unreg_cdevs:
 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
 		if (thermal->cdevs[i])
@@ -398,6 +995,8 @@
 {
 	int i;
 
+	mlxsw_thermal_gearboxes_fini(thermal);
+	mlxsw_thermal_modules_fini(thermal);
 	if (thermal->tzdev) {
 		thermal_zone_device_unregister(thermal->tzdev);
 		thermal->tzdev = NULL;

--
Gitblit v1.6.2