~hc/RK356X_SDK_RELEASE.git

..	..	@@ -1,18 +1,6 @@
	1	+// SPDX-License-Identifier: GPL-2.0-only
1	2	/*
2	3	* Copyright (c) 2009, Microsoft Corporation.
3		- *
4		- * This program is free software; you can redistribute it and/or modify it
5		- * under the terms and conditions of the GNU General Public License,
6		- * version 2, as published by the Free Software Foundation.
7		- *
8		- * This program is distributed in the hope it will be useful, but WITHOUT
9		- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10		- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11		- * more details.
12		- *
13		- * You should have received a copy of the GNU General Public License along with
14		- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15		- * Place - Suite 330, Boston, MA 02111-1307 USA.
16	4	*
17	5	* Authors:
18	6	* Haiyang Zhang <haiyangz@microsoft.com>
..	..	@@ -368,16 +356,21 @@
368	356	};
369	357
370	358	/*
371		- * SRB status codes and masks; a subset of the codes used here.
	359	+ * SRB status codes and masks. In the 8-bit field, the two high order bits
	360	+ * are flags, while the remaining 6 bits are an integer status code. The
	361	+ * definitions here include only the subset of the integer status codes that
	362	+ * are tested for in this driver.
372	363	*/
373		-
374	364	#define SRB_STATUS_AUTOSENSE_VALID 0x80
375	365	#define SRB_STATUS_QUEUE_FROZEN 0x40
376		-#define SRB_STATUS_INVALID_LUN 0x20
377		-#define SRB_STATUS_SUCCESS 0x01
378		-#define SRB_STATUS_ABORTED 0x02
379		-#define SRB_STATUS_ERROR 0x04
380		-#define SRB_STATUS_DATA_OVERRUN 0x12
	366	+
	367	+/* SRB status integer codes */
	368	+#define SRB_STATUS_SUCCESS 0x01
	369	+#define SRB_STATUS_ABORTED 0x02
	370	+#define SRB_STATUS_ERROR 0x04
	371	+#define SRB_STATUS_INVALID_REQUEST 0x06
	372	+#define SRB_STATUS_DATA_OVERRUN 0x12
	373	+#define SRB_STATUS_INVALID_LUN 0x20
381	374
382	375	#define SRB_STATUS(status) \
383	376	(status & ~(SRB_STATUS_AUTOSENSE_VALID \| SRB_STATUS_QUEUE_FROZEN))
..	..	@@ -385,8 +378,9 @@
385	378	* This is the end of Protocol specific defines.
386	379	*/
387	380
388		-static int storvsc_ringbuffer_size = (256 * PAGE_SIZE);
	381	+static int storvsc_ringbuffer_size = (128 * 1024);
389	382	static u32 max_outstanding_req_per_channel;
	383	+static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth);
390	384
391	385	static int storvsc_vcpus_per_sub_channel = 4;
392	386
..	..	@@ -446,7 +440,6 @@
446	440
447	441	bool destroy;
448	442	bool drain_notify;
449		- bool open_sub_channel;
450	443	atomic_t num_outstanding_req;
451	444	struct Scsi_Host *host;
452	445
..	..	@@ -474,6 +467,11 @@
474	467	* Mask of CPUs bound to subchannels.
475	468	*/
476	469	struct cpumask alloced_cpus;
	470	+ /*
	471	+ * Serializes modifications of stor_chns[] from storvsc_do_io()
	472	+ * and storvsc_change_target_cpu().
	473	+ */
	474	+ spinlock_t lock;
477	475	/* Used for vsc/vsp channel reset process */
478	476	struct storvsc_cmd_request init_request;
479	477	struct storvsc_cmd_request reset_request;
..	..	@@ -633,36 +631,101 @@
633	631
634	632	}
635	633
	634	+static void storvsc_change_target_cpu(struct vmbus_channel *channel, u32 old,
	635	+ u32 new)
	636	+{
	637	+ struct storvsc_device *stor_device;
	638	+ struct vmbus_channel *cur_chn;
	639	+ bool old_is_alloced = false;
	640	+ struct hv_device *device;
	641	+ unsigned long flags;
	642	+ int cpu;
	643	+
	644	+ device = channel->primary_channel ?
	645	+ channel->primary_channel->device_obj
	646	+ : channel->device_obj;
	647	+ stor_device = get_out_stor_device(device);
	648	+ if (!stor_device)
	649	+ return;
	650	+
	651	+ /* See storvsc_do_io() -> get_og_chn(). */
	652	+ spin_lock_irqsave(&stor_device->lock, flags);
	653	+
	654	+ /*
	655	+ * Determines if the storvsc device has other channels assigned to
	656	+ * the "old" CPU to update the alloced_cpus mask and the stor_chns
	657	+ * array.
	658	+ */
	659	+ if (device->channel != channel && device->channel->target_cpu == old) {
	660	+ cur_chn = device->channel;
	661	+ old_is_alloced = true;
	662	+ goto old_is_alloced;
	663	+ }
	664	+ list_for_each_entry(cur_chn, &device->channel->sc_list, sc_list) {
	665	+ if (cur_chn == channel)
	666	+ continue;
	667	+ if (cur_chn->target_cpu == old) {
	668	+ old_is_alloced = true;
	669	+ goto old_is_alloced;
	670	+ }
	671	+ }
	672	+
	673	+old_is_alloced:
	674	+ if (old_is_alloced)
	675	+ WRITE_ONCE(stor_device->stor_chns[old], cur_chn);
	676	+ else
	677	+ cpumask_clear_cpu(old, &stor_device->alloced_cpus);
	678	+
	679	+ /* "Flush" the stor_chns array. */
	680	+ for_each_possible_cpu(cpu) {
	681	+ if (stor_device->stor_chns[cpu] && !cpumask_test_cpu(
	682	+ cpu, &stor_device->alloced_cpus))
	683	+ WRITE_ONCE(stor_device->stor_chns[cpu], NULL);
	684	+ }
	685	+
	686	+ WRITE_ONCE(stor_device->stor_chns[new], channel);
	687	+ cpumask_set_cpu(new, &stor_device->alloced_cpus);
	688	+
	689	+ spin_unlock_irqrestore(&stor_device->lock, flags);
	690	+}
	691	+
636	692	static void handle_sc_creation(struct vmbus_channel *new_sc)
637	693	{
638	694	struct hv_device *device = new_sc->primary_channel->device_obj;
	695	+ struct device *dev = &device->device;
639	696	struct storvsc_device *stor_device;
640	697	struct vmstorage_channel_properties props;
	698	+ int ret;
641	699
642	700	stor_device = get_out_stor_device(device);
643	701	if (!stor_device)
644	702	return;
645	703
646		- if (stor_device->open_sub_channel == false)
647		- return;
648		-
649	704	memset(&props, 0, sizeof(struct vmstorage_channel_properties));
650	705
651		- vmbus_open(new_sc,
652		- storvsc_ringbuffer_size,
653		- storvsc_ringbuffer_size,
654		- (void *)&props,
655		- sizeof(struct vmstorage_channel_properties),
656		- storvsc_on_channel_callback, new_sc);
	706	+ ret = vmbus_open(new_sc,
	707	+ storvsc_ringbuffer_size,
	708	+ storvsc_ringbuffer_size,
	709	+ (void *)&props,
	710	+ sizeof(struct vmstorage_channel_properties),
	711	+ storvsc_on_channel_callback, new_sc);
657	712
658		- if (new_sc->state == CHANNEL_OPENED_STATE) {
659		- stor_device->stor_chns[new_sc->target_cpu] = new_sc;
660		- cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
	713	+ /* In case vmbus_open() fails, we don't use the sub-channel. */
	714	+ if (ret != 0) {
	715	+ dev_err(dev, "Failed to open sub-channel: err=%d\n", ret);
	716	+ return;
661	717	}
	718	+
	719	+ new_sc->change_target_cpu_callback = storvsc_change_target_cpu;
	720	+
	721	+ /* Add the sub-channel to the array of available channels. */
	722	+ stor_device->stor_chns[new_sc->target_cpu] = new_sc;
	723	+ cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
662	724	}
663	725
664	726	static void handle_multichannel_storage(struct hv_device *device, int max_chns)
665	727	{
	728	+ struct device *dev = &device->device;
666	729	struct storvsc_device *stor_device;
667	730	int num_sc;
668	731	struct storvsc_cmd_request *request;
..	..	@@ -688,21 +751,11 @@
688	751	request = &stor_device->init_request;
689	752	vstor_packet = &request->vstor_packet;
690	753
691		- stor_device->open_sub_channel = true;
692	754	/*
693	755	* Establish a handler for dealing with subchannels.
694	756	*/
695	757	vmbus_set_sc_create_callback(device->channel, handle_sc_creation);
696	758
697		- /*
698		- * Check to see if sub-channels have already been created. This
699		- * can happen when this driver is re-loaded after unloading.
700		- */
701		-
702		- if (vmbus_are_subchannels_present(device->channel))
703		- return;
704		-
705		- stor_device->open_sub_channel = false;
706	759	/*
707	760	* Request the host to create sub-channels.
708	761	*/
..	..	@@ -719,23 +772,29 @@
719	772	VM_PKT_DATA_INBAND,
720	773	VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
721	774
722		- if (ret != 0)
	775	+ if (ret != 0) {
	776	+ dev_err(dev, "Failed to create sub-channel: err=%d\n", ret);
723	777	return;
	778	+ }
724	779
725	780	t = wait_for_completion_timeout(&request->wait_event, 10*HZ);
726		- if (t == 0)
	781	+ if (t == 0) {
	782	+ dev_err(dev, "Failed to create sub-channel: timed out\n");
727	783	return;
	784	+ }
728	785
729	786	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO \|\|
730		- vstor_packet->status != 0)
	787	+ vstor_packet->status != 0) {
	788	+ dev_err(dev, "Failed to create sub-channel: op=%d, sts=%d\n",
	789	+ vstor_packet->operation, vstor_packet->status);
731	790	return;
	791	+ }
732	792
733	793	/*
734		- * Now that we created the sub-channels, invoke the check; this
735		- * may trigger the callback.
	794	+ * We need to do nothing here, because vmbus_process_offer()
	795	+ * invokes channel->sc_creation_callback, which will open and use
	796	+ * the sub-channel(s).
736	797	*/
737		- stor_device->open_sub_channel = true;
738		- vmbus_are_subchannels_present(device->channel);
739	798	}
740	799
741	800	static void cache_wwn(struct storvsc_device *stor_device,
..	..	@@ -887,6 +946,8 @@
887	946	if (stor_device->stor_chns == NULL)
888	947	return -ENOMEM;
889	948
	949	+ device->channel->change_target_cpu_callback = storvsc_change_target_cpu;
	950	+
890	951	stor_device->stor_chns[device->channel->target_cpu] = device->channel;
891	952	cpumask_set_cpu(device->channel->target_cpu,
892	953	&stor_device->alloced_cpus);
..	..	@@ -938,17 +999,43 @@
938	999	struct storvsc_scan_work *wrk;
939	1000	void (process_err_fn)(struct work_struct work);
940	1001	struct hv_host_device *host_dev = shost_priv(host);
941		- bool do_work = false;
942	1002
943	1003	switch (SRB_STATUS(vm_srb->srb_status)) {
944	1004	case SRB_STATUS_ERROR:
945		- /*
946		- * Let upper layer deal with error when
947		- * sense message is present.
948		- */
	1005	+ case SRB_STATUS_ABORTED:
	1006	+ case SRB_STATUS_INVALID_REQUEST:
	1007	+ if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) {
	1008	+ /* Check for capacity change */
	1009	+ if ((asc == 0x2a) && (ascq == 0x9)) {
	1010	+ process_err_fn = storvsc_device_scan;
	1011	+ /* Retry the I/O that triggered this. */
	1012	+ set_host_byte(scmnd, DID_REQUEUE);
	1013	+ goto do_work;
	1014	+ }
949	1015
950		- if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)
951		- break;
	1016	+ /*
	1017	+ * Check for "Operating parameters have changed"
	1018	+ * due to Hyper-V changing the VHD/VHDX BlockSize
	1019	+ * when adding/removing a differencing disk. This
	1020	+ * causes discard_granularity to change, so do a
	1021	+ * rescan to pick up the new granularity. We don't
	1022	+ * want scsi_report_sense() to output a message
	1023	+ * that a sysadmin wouldn't know what to do with.
	1024	+ */
	1025	+ if ((asc == 0x3f) && (ascq != 0x03) &&
	1026	+ (ascq != 0x0e)) {
	1027	+ process_err_fn = storvsc_device_scan;
	1028	+ set_host_byte(scmnd, DID_REQUEUE);
	1029	+ goto do_work;
	1030	+ }
	1031	+
	1032	+ /*
	1033	+ * Otherwise, let upper layer deal with the
	1034	+ * error when sense message is present
	1035	+ */
	1036	+ return;
	1037	+ }
	1038	+
952	1039	/*
953	1040	* If there is an error; offline the device since all
954	1041	* error recovery strategies would have already been
..	..	@@ -961,37 +1048,26 @@
961	1048	set_host_byte(scmnd, DID_PASSTHROUGH);
962	1049	break;
963	1050	/*
964		- * On Some Windows hosts TEST_UNIT_READY command can return
965		- * SRB_STATUS_ERROR, let the upper level code deal with it
966		- * based on the sense information.
	1051	+ * On some Hyper-V hosts TEST_UNIT_READY command can
	1052	+ * return SRB_STATUS_ERROR. Let the upper level code
	1053	+ * deal with it based on the sense information.
967	1054	*/
968	1055	case TEST_UNIT_READY:
969	1056	break;
970	1057	default:
971	1058	set_host_byte(scmnd, DID_ERROR);
972	1059	}
973		- break;
974		- case SRB_STATUS_INVALID_LUN:
975		- set_host_byte(scmnd, DID_NO_CONNECT);
976		- do_work = true;
977		- process_err_fn = storvsc_remove_lun;
978		- break;
979		- case SRB_STATUS_ABORTED:
980		- if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
981		- (asc == 0x2a) && (ascq == 0x9)) {
982		- do_work = true;
983		- process_err_fn = storvsc_device_scan;
984		- /*
985		- * Retry the I/O that trigerred this.
986		- */
987		- set_host_byte(scmnd, DID_REQUEUE);
988		- }
989		- break;
990		- }
991		-
992		- if (!do_work)
993	1060	return;
994	1061
	1062	+ case SRB_STATUS_INVALID_LUN:
	1063	+ set_host_byte(scmnd, DID_NO_CONNECT);
	1064	+ process_err_fn = storvsc_remove_lun;
	1065	+ goto do_work;
	1066	+
	1067	+ }
	1068	+ return;
	1069	+
	1070	+do_work:
995	1071	/*
996	1072	* We need to schedule work to process this error; schedule it.
997	1073	*/
..	..	@@ -1049,6 +1125,10 @@
1049	1125	data_transfer_length = 0;
1050	1126	}
1051	1127
	1128	+ /* Validate data_transfer_length (from Hyper-V) */
	1129	+ if (data_transfer_length > cmd_request->payload->range.len)
	1130	+ data_transfer_length = cmd_request->payload->range.len;
	1131	+
1052	1132	scsi_set_resid(scmnd,
1053	1133	cmd_request->payload->range.len - data_transfer_length);
1054	1134
..	..	@@ -1089,6 +1169,11 @@
1089	1169	/* Copy over the status...etc */
1090	1170	stor_pkt->vm_srb.scsi_status = vstor_packet->vm_srb.scsi_status;
1091	1171	stor_pkt->vm_srb.srb_status = vstor_packet->vm_srb.srb_status;
	1172	+
	1173	+ /* Validate sense_info_length (from Hyper-V) */
	1174	+ if (vstor_packet->vm_srb.sense_info_length > sense_buffer_size)
	1175	+ vstor_packet->vm_srb.sense_info_length = sense_buffer_size;
	1176	+
1092	1177	stor_pkt->vm_srb.sense_info_length =
1093	1178	vstor_packet->vm_srb.sense_info_length;
1094	1179
..	..	@@ -1259,8 +1344,10 @@
1259	1344	const struct cpumask *node_mask;
1260	1345	int num_channels, tgt_cpu;
1261	1346
1262		- if (stor_device->num_sc == 0)
	1347	+ if (stor_device->num_sc == 0) {
	1348	+ stor_device->stor_chns[q_num] = stor_device->device->channel;
1263	1349	return stor_device->device->channel;
	1350	+ }
1264	1351
1265	1352	/*
1266	1353	* Our channel array is sparsley populated and we
..	..	@@ -1269,7 +1356,6 @@
1269	1356	* The strategy is simple:
1270	1357	* I. Ensure NUMA locality
1271	1358	* II. Distribute evenly (best effort)
1272		- * III. Mapping is persistent.
1273	1359	*/
1274	1360
1275	1361	node_mask = cpumask_of_node(cpu_to_node(q_num));
..	..	@@ -1279,8 +1365,10 @@
1279	1365	if (cpumask_test_cpu(tgt_cpu, node_mask))
1280	1366	num_channels++;
1281	1367	}
1282		- if (num_channels == 0)
	1368	+ if (num_channels == 0) {
	1369	+ stor_device->stor_chns[q_num] = stor_device->device->channel;
1283	1370	return stor_device->device->channel;
	1371	+ }
1284	1372
1285	1373	hash_qnum = q_num;
1286	1374	while (hash_qnum >= num_channels)
..	..	@@ -1306,6 +1394,7 @@
1306	1394	struct storvsc_device *stor_device;
1307	1395	struct vstor_packet *vstor_packet;
1308	1396	struct vmbus_channel outgoing_channel, channel;
	1397	+ unsigned long flags;
1309	1398	int ret = 0;
1310	1399	const struct cpumask *node_mask;
1311	1400	int tgt_cpu;
..	..	@@ -1319,10 +1408,11 @@
1319	1408
1320	1409	request->device = device;
1321	1410	/*
1322		- * Select an an appropriate channel to send the request out.
	1411	+ * Select an appropriate channel to send the request out.
1323	1412	*/
1324		- if (stor_device->stor_chns[q_num] != NULL) {
1325		- outgoing_channel = stor_device->stor_chns[q_num];
	1413	+ /* See storvsc_change_target_cpu(). */
	1414	+ outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
	1415	+ if (outgoing_channel != NULL) {
1326	1416	if (outgoing_channel->target_cpu == q_num) {
1327	1417	/*
1328	1418	* Ideally, we want to pick a different channel if
..	..	@@ -1335,7 +1425,10 @@
1335	1425	continue;
1336	1426	if (tgt_cpu == q_num)
1337	1427	continue;
1338		- channel = stor_device->stor_chns[tgt_cpu];
	1428	+ channel = READ_ONCE(
	1429	+ stor_device->stor_chns[tgt_cpu]);
	1430	+ if (channel == NULL)
	1431	+ continue;
1339	1432	if (hv_get_avail_to_write_percent(
1340	1433	&channel->outbound)
1341	1434	> ring_avail_percent_lowater) {
..	..	@@ -1361,7 +1454,10 @@
1361	1454	for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
1362	1455	if (cpumask_test_cpu(tgt_cpu, node_mask))
1363	1456	continue;
1364		- channel = stor_device->stor_chns[tgt_cpu];
	1457	+ channel = READ_ONCE(
	1458	+ stor_device->stor_chns[tgt_cpu]);
	1459	+ if (channel == NULL)
	1460	+ continue;
1365	1461	if (hv_get_avail_to_write_percent(
1366	1462	&channel->outbound)
1367	1463	> ring_avail_percent_lowater) {
..	..	@@ -1371,7 +1467,14 @@
1371	1467	}
1372	1468	}
1373	1469	} else {
	1470	+ spin_lock_irqsave(&stor_device->lock, flags);
	1471	+ outgoing_channel = stor_device->stor_chns[q_num];
	1472	+ if (outgoing_channel != NULL) {
	1473	+ spin_unlock_irqrestore(&stor_device->lock, flags);
	1474	+ goto found_channel;
	1475	+ }
1374	1476	outgoing_channel = get_og_chn(stor_device, q_num);
	1477	+ spin_unlock_irqrestore(&stor_device->lock, flags);
1375	1478	}
1376	1479
1377	1480	found_channel:
..	..	@@ -1434,9 +1537,8 @@
1434	1537	{
1435	1538	blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ));
1436	1539
1437		- /* Ensure there are no gaps in presented sgls */
1438		- blk_queue_virt_boundary(sdevice->request_queue, PAGE_SIZE - 1);
1439		-
	1540	+ /* storvsc devices don't support MAINTENANCE_IN SCSI cmd */
	1541	+ sdevice->no_report_opcodes = 1;
1440	1542	sdevice->no_write_same = 1;
1441	1543
1442	1544	/*
..	..	@@ -1499,6 +1601,7 @@
1499	1601
1500	1602	request = &stor_device->reset_request;
1501	1603	vstor_packet = &request->vstor_packet;
	1604	+ memset(vstor_packet, 0, sizeof(struct vstor_packet));
1502	1605
1503	1606	init_completion(&request->wait_event);
1504	1607
..	..	@@ -1540,10 +1643,6 @@
1540	1643	*/
1541	1644	static enum blk_eh_timer_return storvsc_eh_timed_out(struct scsi_cmnd *scmnd)
1542	1645	{
1543		-#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
1544		- if (scmnd->device->host->transportt == fc_transport_template)
1545		- return fc_eh_timed_out(scmnd);
1546		-#endif
1547	1646	return BLK_EH_RESET_TIMER;
1548	1647	}
1549	1648
..	..	@@ -1602,6 +1701,7 @@
1602	1701	/* Setup the cmd request */
1603	1702	cmd_request->cmd = scmnd;
1604	1703
	1704	+ memset(&cmd_request->vstor_packet, 0, sizeof(struct vstor_packet));
1605	1705	vm_srb = &cmd_request->vstor_packet.vm_srb;
1606	1706	vm_srb->win8_extension.time_out_value = 60;
1607	1707
..	..	@@ -1654,26 +1754,68 @@
1654	1754
1655	1755	length = scsi_bufflen(scmnd);
1656	1756	payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb;
1657		- payload_sz = sizeof(cmd_request->mpb);
	1757	+ payload_sz = 0;
1658	1758
1659	1759	if (sg_count) {
1660		- if (sg_count > MAX_PAGE_BUFFER_COUNT) {
	1760	+ unsigned int hvpgoff = 0;
	1761	+ unsigned long offset_in_hvpg = sgl->offset & ~HV_HYP_PAGE_MASK;
	1762	+ unsigned int hvpg_count = HVPFN_UP(offset_in_hvpg + length);
	1763	+ u64 hvpfn;
1661	1764
1662		- payload_sz = (sg_count * sizeof(u64) +
1663		- sizeof(struct vmbus_packet_mpb_array));
	1765	+ payload_sz = (hvpg_count * sizeof(u64) +
	1766	+ sizeof(struct vmbus_packet_mpb_array));
	1767	+
	1768	+ if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
1664	1769	payload = kzalloc(payload_sz, GFP_ATOMIC);
1665	1770	if (!payload)
1666	1771	return SCSI_MLQUEUE_DEVICE_BUSY;
1667	1772	}
1668	1773
	1774	+ /*
	1775	+ * sgl is a list of PAGEs, and payload->range.pfn_array
	1776	+ * expects the page number in the unit of HV_HYP_PAGE_SIZE (the
	1777	+ * page size that Hyper-V uses, so here we need to divide PAGEs
	1778	+ * into HV_HYP_PAGE in case that PAGE_SIZE > HV_HYP_PAGE_SIZE.
	1779	+ * Besides, payload->range.offset should be the offset in one
	1780	+ * HV_HYP_PAGE.
	1781	+ */
1669	1782	payload->range.len = length;
1670		- payload->range.offset = sgl[0].offset;
	1783	+ payload->range.offset = offset_in_hvpg;
	1784	+ hvpgoff = sgl->offset >> HV_HYP_PAGE_SHIFT;
1671	1785
1672	1786	cur_sgl = sgl;
1673		- for (i = 0; i < sg_count; i++) {
1674		- payload->range.pfn_array[i] =
1675		- page_to_pfn(sg_page((cur_sgl)));
1676		- cur_sgl = sg_next(cur_sgl);
	1787	+ for (i = 0; i < hvpg_count; i++) {
	1788	+ /*
	1789	+ * 'i' is the index of hv pages in the payload and
	1790	+ * 'hvpgoff' is the offset (in hv pages) of the first
	1791	+ * hv page in the the first page. The relationship
	1792	+ * between the sum of 'i' and 'hvpgoff' and the offset
	1793	+ * (in hv pages) in a payload page ('hvpgoff_in_page')
	1794	+ * is as follow:
	1795	+ *
	1796	+ * \|------------------ PAGE -------------------\|
	1797	+ * \| NR_HV_HYP_PAGES_IN_PAGE hvpgs in total \|
	1798	+ * \|hvpg\|hvpg\| ... \|hvpg\|... \|hvpg\|
	1799	+ * ^ ^ ^ ^
	1800	+ * +-hvpgoff-+ +-hvpgoff_in_page-+
	1801	+ * ^ \|
	1802	+ * +--------------------- i ---------------------------+
	1803	+ */
	1804	+ unsigned int hvpgoff_in_page =
	1805	+ (i + hvpgoff) % NR_HV_HYP_PAGES_IN_PAGE;
	1806	+
	1807	+ /*
	1808	+ * Two cases that we need to fetch a page:
	1809	+ * 1) i == 0, the first step or
	1810	+ * 2) hvpgoff_in_page == 0, when we reach the boundary
	1811	+ * of a page.
	1812	+ */
	1813	+ if (hvpgoff_in_page == 0 \|\| i == 0) {
	1814	+ hvpfn = page_to_hvpfn(sg_page(cur_sgl));
	1815	+ cur_sgl = sg_next(cur_sgl);
	1816	+ }
	1817	+
	1818	+ payload->range.pfn_array[i] = hvpfn + hvpgoff_in_page;
1677	1819	}
1678	1820	}
1679	1821
..	..	@@ -1707,11 +1849,13 @@
1707	1849	.slave_configure = storvsc_device_configure,
1708	1850	.cmd_per_lun = 2048,
1709	1851	.this_id = -1,
1710		- .use_clustering = ENABLE_CLUSTERING,
1711	1852	/* Make sure we dont get a sg segment crosses a page boundary */
1712	1853	.dma_boundary = PAGE_SIZE-1,
	1854	+ /* Ensure there are no gaps in presented sgls */
	1855	+ .virt_boundary_mask = PAGE_SIZE-1,
1713	1856	.no_write_same = 1,
1714	1857	.track_queue_depth = 1,
	1858	+ .change_queue_depth = storvsc_change_queue_depth,
1715	1859	};
1716	1860
1717	1861	enum {
..	..	@@ -1738,6 +1882,13 @@
1738	1882	};
1739	1883
1740	1884	MODULE_DEVICE_TABLE(vmbus, id_table);
	1885	+
	1886	+static const struct { guid_t guid; } fc_guid = { HV_SYNTHFC_GUID };
	1887	+
	1888	+static bool hv_dev_is_fc(struct hv_device *hv_dev)
	1889	+{
	1890	+ return guid_equal(&fc_guid.guid, &hv_dev->dev_type);
	1891	+}
1741	1892
1742	1893	static int storvsc_probe(struct hv_device *device,
1743	1894	const struct hv_vmbus_device_id *dev_id)
..	..	@@ -1803,10 +1954,10 @@
1803	1954	}
1804	1955
1805	1956	stor_device->destroy = false;
1806		- stor_device->open_sub_channel = false;
1807	1957	init_waitqueue_head(&stor_device->waiting_to_drain);
1808	1958	stor_device->device = device;
1809	1959	stor_device->host = host;
	1960	+ spin_lock_init(&stor_device->lock);
1810	1961	hv_set_drvdata(device, stor_device);
1811	1962
1812	1963	stor_device->port_number = host->host_no;
..	..	@@ -1848,20 +1999,23 @@
1848	1999	*/
1849	2000	host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT);
1850	2001	/*
	2002	+ * For non-IDE disks, the host supports multiple channels.
1851	2003	* Set the number of HW queues we are supporting.
1852	2004	*/
1853		- if (stor_device->num_sc != 0)
1854		- host->nr_hw_queues = stor_device->num_sc + 1;
	2005	+ if (!dev_is_ide)
	2006	+ host->nr_hw_queues = num_present_cpus();
1855	2007
1856	2008	/*
1857	2009	* Set the error handler work queue.
1858	2010	*/
1859	2011	host_dev->handle_error_wq =
1860	2012	alloc_ordered_workqueue("storvsc_error_wq_%d",
1861		- WQ_MEM_RECLAIM,
	2013	+ 0,
1862	2014	host->host_no);
1863		- if (!host_dev->handle_error_wq)
	2015	+ if (!host_dev->handle_error_wq) {
	2016	+ ret = -ENOMEM;
1864	2017	goto err_out2;
	2018	+ }
1865	2019	INIT_WORK(&host_dev->host_scan_work, storvsc_host_scan);
1866	2020	/* Register the HBA and start the scsi bus scan */
1867	2021	ret = scsi_add_host(host, &device->device);
..	..	@@ -1919,6 +2073,15 @@
1919	2073	return ret;
1920	2074	}
1921	2075
	2076	+/* Change a scsi target's queue depth */
	2077	+static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth)
	2078	+{
	2079	+ if (queue_depth > scsi_driver.can_queue)
	2080	+ queue_depth = scsi_driver.can_queue;
	2081	+
	2082	+ return scsi_change_queue_depth(sdev, queue_depth);
	2083	+}
	2084	+
1922	2085	static int storvsc_remove(struct hv_device *dev)
1923	2086	{
1924	2087	struct storvsc_device *stor_device = hv_get_drvdata(dev);
..	..	@@ -1939,11 +2102,42 @@
1939	2102	return 0;
1940	2103	}
1941	2104
	2105	+static int storvsc_suspend(struct hv_device *hv_dev)
	2106	+{
	2107	+ struct storvsc_device *stor_device = hv_get_drvdata(hv_dev);
	2108	+ struct Scsi_Host *host = stor_device->host;
	2109	+ struct hv_host_device *host_dev = shost_priv(host);
	2110	+
	2111	+ storvsc_wait_to_drain(stor_device);
	2112	+
	2113	+ drain_workqueue(host_dev->handle_error_wq);
	2114	+
	2115	+ vmbus_close(hv_dev->channel);
	2116	+
	2117	+ kfree(stor_device->stor_chns);
	2118	+ stor_device->stor_chns = NULL;
	2119	+
	2120	+ cpumask_clear(&stor_device->alloced_cpus);
	2121	+
	2122	+ return 0;
	2123	+}
	2124	+
	2125	+static int storvsc_resume(struct hv_device *hv_dev)
	2126	+{
	2127	+ int ret;
	2128	+
	2129	+ ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size,
	2130	+ hv_dev_is_fc(hv_dev));
	2131	+ return ret;
	2132	+}
	2133	+
1942	2134	static struct hv_driver storvsc_drv = {
1943	2135	.name = KBUILD_MODNAME,
1944	2136	.id_table = id_table,
1945	2137	.probe = storvsc_probe,
1946	2138	.remove = storvsc_remove,
	2139	+ .suspend = storvsc_suspend,
	2140	+ .resume = storvsc_resume,
1947	2141	.driver = {
1948	2142	.probe_type = PROBE_PREFER_ASYNCHRONOUS,
1949	2143	},