hc
2024-02-20 102a0743326a03cd1a1202ceda21e175b7d3575c
kernel/drivers/scsi/storvsc_drv.c
....@@ -1,18 +1,6 @@
1
+// SPDX-License-Identifier: GPL-2.0-only
12 /*
23 * Copyright (c) 2009, Microsoft Corporation.
3
- *
4
- * This program is free software; you can redistribute it and/or modify it
5
- * under the terms and conditions of the GNU General Public License,
6
- * version 2, as published by the Free Software Foundation.
7
- *
8
- * This program is distributed in the hope it will be useful, but WITHOUT
9
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11
- * more details.
12
- *
13
- * You should have received a copy of the GNU General Public License along with
14
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15
- * Place - Suite 330, Boston, MA 02111-1307 USA.
164 *
175 * Authors:
186 * Haiyang Zhang <haiyangz@microsoft.com>
....@@ -368,16 +356,21 @@
368356 };
369357
370358 /*
371
- * SRB status codes and masks; a subset of the codes used here.
359
+ * SRB status codes and masks. In the 8-bit field, the two high order bits
360
+ * are flags, while the remaining 6 bits are an integer status code. The
361
+ * definitions here include only the subset of the integer status codes that
362
+ * are tested for in this driver.
372363 */
373
-
374364 #define SRB_STATUS_AUTOSENSE_VALID 0x80
375365 #define SRB_STATUS_QUEUE_FROZEN 0x40
376
-#define SRB_STATUS_INVALID_LUN 0x20
377
-#define SRB_STATUS_SUCCESS 0x01
378
-#define SRB_STATUS_ABORTED 0x02
379
-#define SRB_STATUS_ERROR 0x04
380
-#define SRB_STATUS_DATA_OVERRUN 0x12
366
+
367
+/* SRB status integer codes */
368
+#define SRB_STATUS_SUCCESS 0x01
369
+#define SRB_STATUS_ABORTED 0x02
370
+#define SRB_STATUS_ERROR 0x04
371
+#define SRB_STATUS_INVALID_REQUEST 0x06
372
+#define SRB_STATUS_DATA_OVERRUN 0x12
373
+#define SRB_STATUS_INVALID_LUN 0x20
381374
382375 #define SRB_STATUS(status) \
383376 (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN))
....@@ -385,8 +378,9 @@
385378 * This is the end of Protocol specific defines.
386379 */
387380
388
-static int storvsc_ringbuffer_size = (256 * PAGE_SIZE);
381
+static int storvsc_ringbuffer_size = (128 * 1024);
389382 static u32 max_outstanding_req_per_channel;
383
+static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth);
390384
391385 static int storvsc_vcpus_per_sub_channel = 4;
392386
....@@ -446,7 +440,6 @@
446440
447441 bool destroy;
448442 bool drain_notify;
449
- bool open_sub_channel;
450443 atomic_t num_outstanding_req;
451444 struct Scsi_Host *host;
452445
....@@ -474,6 +467,11 @@
474467 * Mask of CPUs bound to subchannels.
475468 */
476469 struct cpumask alloced_cpus;
470
+ /*
471
+ * Serializes modifications of stor_chns[] from storvsc_do_io()
472
+ * and storvsc_change_target_cpu().
473
+ */
474
+ spinlock_t lock;
477475 /* Used for vsc/vsp channel reset process */
478476 struct storvsc_cmd_request init_request;
479477 struct storvsc_cmd_request reset_request;
....@@ -633,36 +631,101 @@
633631
634632 }
635633
634
+static void storvsc_change_target_cpu(struct vmbus_channel *channel, u32 old,
635
+ u32 new)
636
+{
637
+ struct storvsc_device *stor_device;
638
+ struct vmbus_channel *cur_chn;
639
+ bool old_is_alloced = false;
640
+ struct hv_device *device;
641
+ unsigned long flags;
642
+ int cpu;
643
+
644
+ device = channel->primary_channel ?
645
+ channel->primary_channel->device_obj
646
+ : channel->device_obj;
647
+ stor_device = get_out_stor_device(device);
648
+ if (!stor_device)
649
+ return;
650
+
651
+ /* See storvsc_do_io() -> get_og_chn(). */
652
+ spin_lock_irqsave(&stor_device->lock, flags);
653
+
654
+ /*
655
+ * Determines if the storvsc device has other channels assigned to
656
+ * the "old" CPU to update the alloced_cpus mask and the stor_chns
657
+ * array.
658
+ */
659
+ if (device->channel != channel && device->channel->target_cpu == old) {
660
+ cur_chn = device->channel;
661
+ old_is_alloced = true;
662
+ goto old_is_alloced;
663
+ }
664
+ list_for_each_entry(cur_chn, &device->channel->sc_list, sc_list) {
665
+ if (cur_chn == channel)
666
+ continue;
667
+ if (cur_chn->target_cpu == old) {
668
+ old_is_alloced = true;
669
+ goto old_is_alloced;
670
+ }
671
+ }
672
+
673
+old_is_alloced:
674
+ if (old_is_alloced)
675
+ WRITE_ONCE(stor_device->stor_chns[old], cur_chn);
676
+ else
677
+ cpumask_clear_cpu(old, &stor_device->alloced_cpus);
678
+
679
+ /* "Flush" the stor_chns array. */
680
+ for_each_possible_cpu(cpu) {
681
+ if (stor_device->stor_chns[cpu] && !cpumask_test_cpu(
682
+ cpu, &stor_device->alloced_cpus))
683
+ WRITE_ONCE(stor_device->stor_chns[cpu], NULL);
684
+ }
685
+
686
+ WRITE_ONCE(stor_device->stor_chns[new], channel);
687
+ cpumask_set_cpu(new, &stor_device->alloced_cpus);
688
+
689
+ spin_unlock_irqrestore(&stor_device->lock, flags);
690
+}
691
+
636692 static void handle_sc_creation(struct vmbus_channel *new_sc)
637693 {
638694 struct hv_device *device = new_sc->primary_channel->device_obj;
695
+ struct device *dev = &device->device;
639696 struct storvsc_device *stor_device;
640697 struct vmstorage_channel_properties props;
698
+ int ret;
641699
642700 stor_device = get_out_stor_device(device);
643701 if (!stor_device)
644702 return;
645703
646
- if (stor_device->open_sub_channel == false)
647
- return;
648
-
649704 memset(&props, 0, sizeof(struct vmstorage_channel_properties));
650705
651
- vmbus_open(new_sc,
652
- storvsc_ringbuffer_size,
653
- storvsc_ringbuffer_size,
654
- (void *)&props,
655
- sizeof(struct vmstorage_channel_properties),
656
- storvsc_on_channel_callback, new_sc);
706
+ ret = vmbus_open(new_sc,
707
+ storvsc_ringbuffer_size,
708
+ storvsc_ringbuffer_size,
709
+ (void *)&props,
710
+ sizeof(struct vmstorage_channel_properties),
711
+ storvsc_on_channel_callback, new_sc);
657712
658
- if (new_sc->state == CHANNEL_OPENED_STATE) {
659
- stor_device->stor_chns[new_sc->target_cpu] = new_sc;
660
- cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
713
+ /* In case vmbus_open() fails, we don't use the sub-channel. */
714
+ if (ret != 0) {
715
+ dev_err(dev, "Failed to open sub-channel: err=%d\n", ret);
716
+ return;
661717 }
718
+
719
+ new_sc->change_target_cpu_callback = storvsc_change_target_cpu;
720
+
721
+ /* Add the sub-channel to the array of available channels. */
722
+ stor_device->stor_chns[new_sc->target_cpu] = new_sc;
723
+ cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
662724 }
663725
664726 static void handle_multichannel_storage(struct hv_device *device, int max_chns)
665727 {
728
+ struct device *dev = &device->device;
666729 struct storvsc_device *stor_device;
667730 int num_sc;
668731 struct storvsc_cmd_request *request;
....@@ -688,21 +751,11 @@
688751 request = &stor_device->init_request;
689752 vstor_packet = &request->vstor_packet;
690753
691
- stor_device->open_sub_channel = true;
692754 /*
693755 * Establish a handler for dealing with subchannels.
694756 */
695757 vmbus_set_sc_create_callback(device->channel, handle_sc_creation);
696758
697
- /*
698
- * Check to see if sub-channels have already been created. This
699
- * can happen when this driver is re-loaded after unloading.
700
- */
701
-
702
- if (vmbus_are_subchannels_present(device->channel))
703
- return;
704
-
705
- stor_device->open_sub_channel = false;
706759 /*
707760 * Request the host to create sub-channels.
708761 */
....@@ -719,23 +772,29 @@
719772 VM_PKT_DATA_INBAND,
720773 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
721774
722
- if (ret != 0)
775
+ if (ret != 0) {
776
+ dev_err(dev, "Failed to create sub-channel: err=%d\n", ret);
723777 return;
778
+ }
724779
725780 t = wait_for_completion_timeout(&request->wait_event, 10*HZ);
726
- if (t == 0)
781
+ if (t == 0) {
782
+ dev_err(dev, "Failed to create sub-channel: timed out\n");
727783 return;
784
+ }
728785
729786 if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO ||
730
- vstor_packet->status != 0)
787
+ vstor_packet->status != 0) {
788
+ dev_err(dev, "Failed to create sub-channel: op=%d, sts=%d\n",
789
+ vstor_packet->operation, vstor_packet->status);
731790 return;
791
+ }
732792
733793 /*
734
- * Now that we created the sub-channels, invoke the check; this
735
- * may trigger the callback.
794
+ * We need to do nothing here, because vmbus_process_offer()
795
+ * invokes channel->sc_creation_callback, which will open and use
796
+ * the sub-channel(s).
736797 */
737
- stor_device->open_sub_channel = true;
738
- vmbus_are_subchannels_present(device->channel);
739798 }
740799
741800 static void cache_wwn(struct storvsc_device *stor_device,
....@@ -887,6 +946,8 @@
887946 if (stor_device->stor_chns == NULL)
888947 return -ENOMEM;
889948
949
+ device->channel->change_target_cpu_callback = storvsc_change_target_cpu;
950
+
890951 stor_device->stor_chns[device->channel->target_cpu] = device->channel;
891952 cpumask_set_cpu(device->channel->target_cpu,
892953 &stor_device->alloced_cpus);
....@@ -938,17 +999,43 @@
938999 struct storvsc_scan_work *wrk;
9391000 void (*process_err_fn)(struct work_struct *work);
9401001 struct hv_host_device *host_dev = shost_priv(host);
941
- bool do_work = false;
9421002
9431003 switch (SRB_STATUS(vm_srb->srb_status)) {
9441004 case SRB_STATUS_ERROR:
945
- /*
946
- * Let upper layer deal with error when
947
- * sense message is present.
948
- */
1005
+ case SRB_STATUS_ABORTED:
1006
+ case SRB_STATUS_INVALID_REQUEST:
1007
+ if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) {
1008
+ /* Check for capacity change */
1009
+ if ((asc == 0x2a) && (ascq == 0x9)) {
1010
+ process_err_fn = storvsc_device_scan;
1011
+ /* Retry the I/O that triggered this. */
1012
+ set_host_byte(scmnd, DID_REQUEUE);
1013
+ goto do_work;
1014
+ }
9491015
950
- if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)
951
- break;
1016
+ /*
1017
+ * Check for "Operating parameters have changed"
1018
+ * due to Hyper-V changing the VHD/VHDX BlockSize
1019
+ * when adding/removing a differencing disk. This
1020
+ * causes discard_granularity to change, so do a
1021
+ * rescan to pick up the new granularity. We don't
1022
+ * want scsi_report_sense() to output a message
1023
+ * that a sysadmin wouldn't know what to do with.
1024
+ */
1025
+ if ((asc == 0x3f) && (ascq != 0x03) &&
1026
+ (ascq != 0x0e)) {
1027
+ process_err_fn = storvsc_device_scan;
1028
+ set_host_byte(scmnd, DID_REQUEUE);
1029
+ goto do_work;
1030
+ }
1031
+
1032
+ /*
1033
+ * Otherwise, let upper layer deal with the
1034
+ * error when sense message is present
1035
+ */
1036
+ return;
1037
+ }
1038
+
9521039 /*
9531040 * If there is an error; offline the device since all
9541041 * error recovery strategies would have already been
....@@ -961,37 +1048,26 @@
9611048 set_host_byte(scmnd, DID_PASSTHROUGH);
9621049 break;
9631050 /*
964
- * On Some Windows hosts TEST_UNIT_READY command can return
965
- * SRB_STATUS_ERROR, let the upper level code deal with it
966
- * based on the sense information.
1051
+ * On some Hyper-V hosts TEST_UNIT_READY command can
1052
+ * return SRB_STATUS_ERROR. Let the upper level code
1053
+ * deal with it based on the sense information.
9671054 */
9681055 case TEST_UNIT_READY:
9691056 break;
9701057 default:
9711058 set_host_byte(scmnd, DID_ERROR);
9721059 }
973
- break;
974
- case SRB_STATUS_INVALID_LUN:
975
- set_host_byte(scmnd, DID_NO_CONNECT);
976
- do_work = true;
977
- process_err_fn = storvsc_remove_lun;
978
- break;
979
- case SRB_STATUS_ABORTED:
980
- if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID &&
981
- (asc == 0x2a) && (ascq == 0x9)) {
982
- do_work = true;
983
- process_err_fn = storvsc_device_scan;
984
- /*
985
- * Retry the I/O that trigerred this.
986
- */
987
- set_host_byte(scmnd, DID_REQUEUE);
988
- }
989
- break;
990
- }
991
-
992
- if (!do_work)
9931060 return;
9941061
1062
+ case SRB_STATUS_INVALID_LUN:
1063
+ set_host_byte(scmnd, DID_NO_CONNECT);
1064
+ process_err_fn = storvsc_remove_lun;
1065
+ goto do_work;
1066
+
1067
+ }
1068
+ return;
1069
+
1070
+do_work:
9951071 /*
9961072 * We need to schedule work to process this error; schedule it.
9971073 */
....@@ -1049,6 +1125,10 @@
10491125 data_transfer_length = 0;
10501126 }
10511127
1128
+ /* Validate data_transfer_length (from Hyper-V) */
1129
+ if (data_transfer_length > cmd_request->payload->range.len)
1130
+ data_transfer_length = cmd_request->payload->range.len;
1131
+
10521132 scsi_set_resid(scmnd,
10531133 cmd_request->payload->range.len - data_transfer_length);
10541134
....@@ -1089,6 +1169,11 @@
10891169 /* Copy over the status...etc */
10901170 stor_pkt->vm_srb.scsi_status = vstor_packet->vm_srb.scsi_status;
10911171 stor_pkt->vm_srb.srb_status = vstor_packet->vm_srb.srb_status;
1172
+
1173
+ /* Validate sense_info_length (from Hyper-V) */
1174
+ if (vstor_packet->vm_srb.sense_info_length > sense_buffer_size)
1175
+ vstor_packet->vm_srb.sense_info_length = sense_buffer_size;
1176
+
10921177 stor_pkt->vm_srb.sense_info_length =
10931178 vstor_packet->vm_srb.sense_info_length;
10941179
....@@ -1259,8 +1344,10 @@
12591344 const struct cpumask *node_mask;
12601345 int num_channels, tgt_cpu;
12611346
1262
- if (stor_device->num_sc == 0)
1347
+ if (stor_device->num_sc == 0) {
1348
+ stor_device->stor_chns[q_num] = stor_device->device->channel;
12631349 return stor_device->device->channel;
1350
+ }
12641351
12651352 /*
12661353 * Our channel array is sparsley populated and we
....@@ -1269,7 +1356,6 @@
12691356 * The strategy is simple:
12701357 * I. Ensure NUMA locality
12711358 * II. Distribute evenly (best effort)
1272
- * III. Mapping is persistent.
12731359 */
12741360
12751361 node_mask = cpumask_of_node(cpu_to_node(q_num));
....@@ -1279,8 +1365,10 @@
12791365 if (cpumask_test_cpu(tgt_cpu, node_mask))
12801366 num_channels++;
12811367 }
1282
- if (num_channels == 0)
1368
+ if (num_channels == 0) {
1369
+ stor_device->stor_chns[q_num] = stor_device->device->channel;
12831370 return stor_device->device->channel;
1371
+ }
12841372
12851373 hash_qnum = q_num;
12861374 while (hash_qnum >= num_channels)
....@@ -1306,6 +1394,7 @@
13061394 struct storvsc_device *stor_device;
13071395 struct vstor_packet *vstor_packet;
13081396 struct vmbus_channel *outgoing_channel, *channel;
1397
+ unsigned long flags;
13091398 int ret = 0;
13101399 const struct cpumask *node_mask;
13111400 int tgt_cpu;
....@@ -1319,10 +1408,11 @@
13191408
13201409 request->device = device;
13211410 /*
1322
- * Select an an appropriate channel to send the request out.
1411
+ * Select an appropriate channel to send the request out.
13231412 */
1324
- if (stor_device->stor_chns[q_num] != NULL) {
1325
- outgoing_channel = stor_device->stor_chns[q_num];
1413
+ /* See storvsc_change_target_cpu(). */
1414
+ outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
1415
+ if (outgoing_channel != NULL) {
13261416 if (outgoing_channel->target_cpu == q_num) {
13271417 /*
13281418 * Ideally, we want to pick a different channel if
....@@ -1335,7 +1425,10 @@
13351425 continue;
13361426 if (tgt_cpu == q_num)
13371427 continue;
1338
- channel = stor_device->stor_chns[tgt_cpu];
1428
+ channel = READ_ONCE(
1429
+ stor_device->stor_chns[tgt_cpu]);
1430
+ if (channel == NULL)
1431
+ continue;
13391432 if (hv_get_avail_to_write_percent(
13401433 &channel->outbound)
13411434 > ring_avail_percent_lowater) {
....@@ -1361,7 +1454,10 @@
13611454 for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
13621455 if (cpumask_test_cpu(tgt_cpu, node_mask))
13631456 continue;
1364
- channel = stor_device->stor_chns[tgt_cpu];
1457
+ channel = READ_ONCE(
1458
+ stor_device->stor_chns[tgt_cpu]);
1459
+ if (channel == NULL)
1460
+ continue;
13651461 if (hv_get_avail_to_write_percent(
13661462 &channel->outbound)
13671463 > ring_avail_percent_lowater) {
....@@ -1371,7 +1467,14 @@
13711467 }
13721468 }
13731469 } else {
1470
+ spin_lock_irqsave(&stor_device->lock, flags);
1471
+ outgoing_channel = stor_device->stor_chns[q_num];
1472
+ if (outgoing_channel != NULL) {
1473
+ spin_unlock_irqrestore(&stor_device->lock, flags);
1474
+ goto found_channel;
1475
+ }
13741476 outgoing_channel = get_og_chn(stor_device, q_num);
1477
+ spin_unlock_irqrestore(&stor_device->lock, flags);
13751478 }
13761479
13771480 found_channel:
....@@ -1434,9 +1537,8 @@
14341537 {
14351538 blk_queue_rq_timeout(sdevice->request_queue, (storvsc_timeout * HZ));
14361539
1437
- /* Ensure there are no gaps in presented sgls */
1438
- blk_queue_virt_boundary(sdevice->request_queue, PAGE_SIZE - 1);
1439
-
1540
+ /* storvsc devices don't support MAINTENANCE_IN SCSI cmd */
1541
+ sdevice->no_report_opcodes = 1;
14401542 sdevice->no_write_same = 1;
14411543
14421544 /*
....@@ -1499,6 +1601,7 @@
14991601
15001602 request = &stor_device->reset_request;
15011603 vstor_packet = &request->vstor_packet;
1604
+ memset(vstor_packet, 0, sizeof(struct vstor_packet));
15021605
15031606 init_completion(&request->wait_event);
15041607
....@@ -1540,10 +1643,6 @@
15401643 */
15411644 static enum blk_eh_timer_return storvsc_eh_timed_out(struct scsi_cmnd *scmnd)
15421645 {
1543
-#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
1544
- if (scmnd->device->host->transportt == fc_transport_template)
1545
- return fc_eh_timed_out(scmnd);
1546
-#endif
15471646 return BLK_EH_RESET_TIMER;
15481647 }
15491648
....@@ -1602,6 +1701,7 @@
16021701 /* Setup the cmd request */
16031702 cmd_request->cmd = scmnd;
16041703
1704
+ memset(&cmd_request->vstor_packet, 0, sizeof(struct vstor_packet));
16051705 vm_srb = &cmd_request->vstor_packet.vm_srb;
16061706 vm_srb->win8_extension.time_out_value = 60;
16071707
....@@ -1654,26 +1754,68 @@
16541754
16551755 length = scsi_bufflen(scmnd);
16561756 payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb;
1657
- payload_sz = sizeof(cmd_request->mpb);
1757
+ payload_sz = 0;
16581758
16591759 if (sg_count) {
1660
- if (sg_count > MAX_PAGE_BUFFER_COUNT) {
1760
+ unsigned int hvpgoff = 0;
1761
+ unsigned long offset_in_hvpg = sgl->offset & ~HV_HYP_PAGE_MASK;
1762
+ unsigned int hvpg_count = HVPFN_UP(offset_in_hvpg + length);
1763
+ u64 hvpfn;
16611764
1662
- payload_sz = (sg_count * sizeof(u64) +
1663
- sizeof(struct vmbus_packet_mpb_array));
1765
+ payload_sz = (hvpg_count * sizeof(u64) +
1766
+ sizeof(struct vmbus_packet_mpb_array));
1767
+
1768
+ if (hvpg_count > MAX_PAGE_BUFFER_COUNT) {
16641769 payload = kzalloc(payload_sz, GFP_ATOMIC);
16651770 if (!payload)
16661771 return SCSI_MLQUEUE_DEVICE_BUSY;
16671772 }
16681773
1774
+ /*
1775
+ * sgl is a list of PAGEs, and payload->range.pfn_array
1776
+ * expects the page number in the unit of HV_HYP_PAGE_SIZE (the
1777
+ * page size that Hyper-V uses, so here we need to divide PAGEs
1778
+ * into HV_HYP_PAGE in case that PAGE_SIZE > HV_HYP_PAGE_SIZE.
1779
+ * Besides, payload->range.offset should be the offset in one
1780
+ * HV_HYP_PAGE.
1781
+ */
16691782 payload->range.len = length;
1670
- payload->range.offset = sgl[0].offset;
1783
+ payload->range.offset = offset_in_hvpg;
1784
+ hvpgoff = sgl->offset >> HV_HYP_PAGE_SHIFT;
16711785
16721786 cur_sgl = sgl;
1673
- for (i = 0; i < sg_count; i++) {
1674
- payload->range.pfn_array[i] =
1675
- page_to_pfn(sg_page((cur_sgl)));
1676
- cur_sgl = sg_next(cur_sgl);
1787
+ for (i = 0; i < hvpg_count; i++) {
1788
+ /*
1789
+ * 'i' is the index of hv pages in the payload and
1790
+ * 'hvpgoff' is the offset (in hv pages) of the first
1791
+ * hv page in the the first page. The relationship
1792
+ * between the sum of 'i' and 'hvpgoff' and the offset
1793
+ * (in hv pages) in a payload page ('hvpgoff_in_page')
1794
+ * is as follow:
1795
+ *
1796
+ * |------------------ PAGE -------------------|
1797
+ * | NR_HV_HYP_PAGES_IN_PAGE hvpgs in total |
1798
+ * |hvpg|hvpg| ... |hvpg|... |hvpg|
1799
+ * ^ ^ ^ ^
1800
+ * +-hvpgoff-+ +-hvpgoff_in_page-+
1801
+ * ^ |
1802
+ * +--------------------- i ---------------------------+
1803
+ */
1804
+ unsigned int hvpgoff_in_page =
1805
+ (i + hvpgoff) % NR_HV_HYP_PAGES_IN_PAGE;
1806
+
1807
+ /*
1808
+ * Two cases that we need to fetch a page:
1809
+ * 1) i == 0, the first step or
1810
+ * 2) hvpgoff_in_page == 0, when we reach the boundary
1811
+ * of a page.
1812
+ */
1813
+ if (hvpgoff_in_page == 0 || i == 0) {
1814
+ hvpfn = page_to_hvpfn(sg_page(cur_sgl));
1815
+ cur_sgl = sg_next(cur_sgl);
1816
+ }
1817
+
1818
+ payload->range.pfn_array[i] = hvpfn + hvpgoff_in_page;
16771819 }
16781820 }
16791821
....@@ -1707,11 +1849,13 @@
17071849 .slave_configure = storvsc_device_configure,
17081850 .cmd_per_lun = 2048,
17091851 .this_id = -1,
1710
- .use_clustering = ENABLE_CLUSTERING,
17111852 /* Make sure we dont get a sg segment crosses a page boundary */
17121853 .dma_boundary = PAGE_SIZE-1,
1854
+ /* Ensure there are no gaps in presented sgls */
1855
+ .virt_boundary_mask = PAGE_SIZE-1,
17131856 .no_write_same = 1,
17141857 .track_queue_depth = 1,
1858
+ .change_queue_depth = storvsc_change_queue_depth,
17151859 };
17161860
17171861 enum {
....@@ -1738,6 +1882,13 @@
17381882 };
17391883
17401884 MODULE_DEVICE_TABLE(vmbus, id_table);
1885
+
1886
+static const struct { guid_t guid; } fc_guid = { HV_SYNTHFC_GUID };
1887
+
1888
+static bool hv_dev_is_fc(struct hv_device *hv_dev)
1889
+{
1890
+ return guid_equal(&fc_guid.guid, &hv_dev->dev_type);
1891
+}
17411892
17421893 static int storvsc_probe(struct hv_device *device,
17431894 const struct hv_vmbus_device_id *dev_id)
....@@ -1803,10 +1954,10 @@
18031954 }
18041955
18051956 stor_device->destroy = false;
1806
- stor_device->open_sub_channel = false;
18071957 init_waitqueue_head(&stor_device->waiting_to_drain);
18081958 stor_device->device = device;
18091959 stor_device->host = host;
1960
+ spin_lock_init(&stor_device->lock);
18101961 hv_set_drvdata(device, stor_device);
18111962
18121963 stor_device->port_number = host->host_no;
....@@ -1848,20 +1999,23 @@
18481999 */
18492000 host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT);
18502001 /*
2002
+ * For non-IDE disks, the host supports multiple channels.
18512003 * Set the number of HW queues we are supporting.
18522004 */
1853
- if (stor_device->num_sc != 0)
1854
- host->nr_hw_queues = stor_device->num_sc + 1;
2005
+ if (!dev_is_ide)
2006
+ host->nr_hw_queues = num_present_cpus();
18552007
18562008 /*
18572009 * Set the error handler work queue.
18582010 */
18592011 host_dev->handle_error_wq =
18602012 alloc_ordered_workqueue("storvsc_error_wq_%d",
1861
- WQ_MEM_RECLAIM,
2013
+ 0,
18622014 host->host_no);
1863
- if (!host_dev->handle_error_wq)
2015
+ if (!host_dev->handle_error_wq) {
2016
+ ret = -ENOMEM;
18642017 goto err_out2;
2018
+ }
18652019 INIT_WORK(&host_dev->host_scan_work, storvsc_host_scan);
18662020 /* Register the HBA and start the scsi bus scan */
18672021 ret = scsi_add_host(host, &device->device);
....@@ -1919,6 +2073,15 @@
19192073 return ret;
19202074 }
19212075
2076
+/* Change a scsi target's queue depth */
2077
+static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth)
2078
+{
2079
+ if (queue_depth > scsi_driver.can_queue)
2080
+ queue_depth = scsi_driver.can_queue;
2081
+
2082
+ return scsi_change_queue_depth(sdev, queue_depth);
2083
+}
2084
+
19222085 static int storvsc_remove(struct hv_device *dev)
19232086 {
19242087 struct storvsc_device *stor_device = hv_get_drvdata(dev);
....@@ -1939,11 +2102,42 @@
19392102 return 0;
19402103 }
19412104
2105
+static int storvsc_suspend(struct hv_device *hv_dev)
2106
+{
2107
+ struct storvsc_device *stor_device = hv_get_drvdata(hv_dev);
2108
+ struct Scsi_Host *host = stor_device->host;
2109
+ struct hv_host_device *host_dev = shost_priv(host);
2110
+
2111
+ storvsc_wait_to_drain(stor_device);
2112
+
2113
+ drain_workqueue(host_dev->handle_error_wq);
2114
+
2115
+ vmbus_close(hv_dev->channel);
2116
+
2117
+ kfree(stor_device->stor_chns);
2118
+ stor_device->stor_chns = NULL;
2119
+
2120
+ cpumask_clear(&stor_device->alloced_cpus);
2121
+
2122
+ return 0;
2123
+}
2124
+
2125
+static int storvsc_resume(struct hv_device *hv_dev)
2126
+{
2127
+ int ret;
2128
+
2129
+ ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size,
2130
+ hv_dev_is_fc(hv_dev));
2131
+ return ret;
2132
+}
2133
+
19422134 static struct hv_driver storvsc_drv = {
19432135 .name = KBUILD_MODNAME,
19442136 .id_table = id_table,
19452137 .probe = storvsc_probe,
19462138 .remove = storvsc_remove,
2139
+ .suspend = storvsc_suspend,
2140
+ .resume = storvsc_resume,
19472141 .driver = {
19482142 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
19492143 },