.. | .. |
---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
---|
1 | 2 | /* |
---|
2 | 3 | * Copyright (c) 2009, Microsoft Corporation. |
---|
3 | | - * |
---|
4 | | - * This program is free software; you can redistribute it and/or modify it |
---|
5 | | - * under the terms and conditions of the GNU General Public License, |
---|
6 | | - * version 2, as published by the Free Software Foundation. |
---|
7 | | - * |
---|
8 | | - * This program is distributed in the hope it will be useful, but WITHOUT |
---|
9 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
---|
10 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
---|
11 | | - * more details. |
---|
12 | | - * |
---|
13 | | - * You should have received a copy of the GNU General Public License along with |
---|
14 | | - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple |
---|
15 | | - * Place - Suite 330, Boston, MA 02111-1307 USA. |
---|
16 | 4 | * |
---|
17 | 5 | * Authors: |
---|
18 | 6 | * Haiyang Zhang <haiyangz@microsoft.com> |
---|
.. | .. |
---|
30 | 18 | #include <linux/module.h> |
---|
31 | 19 | #include <linux/completion.h> |
---|
32 | 20 | #include <linux/delay.h> |
---|
| 21 | +#include <linux/cpu.h> |
---|
33 | 22 | #include <linux/hyperv.h> |
---|
34 | 23 | #include <asm/mshyperv.h> |
---|
35 | 24 | |
---|
36 | 25 | #include "hyperv_vmbus.h" |
---|
37 | 26 | |
---|
38 | | -static void init_vp_index(struct vmbus_channel *channel, u16 dev_type); |
---|
| 27 | +static void init_vp_index(struct vmbus_channel *channel); |
---|
39 | 28 | |
---|
40 | | -static const struct vmbus_device vmbus_devs[] = { |
---|
| 29 | +const struct vmbus_device vmbus_devs[] = { |
---|
41 | 30 | /* IDE */ |
---|
42 | 31 | { .dev_type = HV_IDE, |
---|
43 | 32 | HV_IDE_GUID, |
---|
.. | .. |
---|
141 | 130 | }; |
---|
142 | 131 | |
---|
143 | 132 | static const struct { |
---|
144 | | - uuid_le guid; |
---|
| 133 | + guid_t guid; |
---|
145 | 134 | } vmbus_unsupported_devs[] = { |
---|
146 | 135 | { HV_AVMA1_GUID }, |
---|
147 | 136 | { HV_AVMA2_GUID }, |
---|
.. | .. |
---|
171 | 160 | spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); |
---|
172 | 161 | } |
---|
173 | 162 | |
---|
174 | | -static bool is_unsupported_vmbus_devs(const uuid_le *guid) |
---|
| 163 | +static bool is_unsupported_vmbus_devs(const guid_t *guid) |
---|
175 | 164 | { |
---|
176 | 165 | int i; |
---|
177 | 166 | |
---|
178 | 167 | for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++) |
---|
179 | | - if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid)) |
---|
| 168 | + if (guid_equal(guid, &vmbus_unsupported_devs[i].guid)) |
---|
180 | 169 | return true; |
---|
181 | 170 | return false; |
---|
182 | 171 | } |
---|
183 | 172 | |
---|
184 | 173 | static u16 hv_get_dev_type(const struct vmbus_channel *channel) |
---|
185 | 174 | { |
---|
186 | | - const uuid_le *guid = &channel->offermsg.offer.if_type; |
---|
| 175 | + const guid_t *guid = &channel->offermsg.offer.if_type; |
---|
187 | 176 | u16 i; |
---|
188 | 177 | |
---|
189 | 178 | if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid)) |
---|
190 | 179 | return HV_UNKNOWN; |
---|
191 | 180 | |
---|
192 | 181 | for (i = HV_IDE; i < HV_UNKNOWN; i++) { |
---|
193 | | - if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) |
---|
| 182 | + if (guid_equal(guid, &vmbus_devs[i].guid)) |
---|
194 | 183 | return i; |
---|
195 | 184 | } |
---|
196 | 185 | pr_info("Unknown GUID: %pUl\n", guid); |
---|
.. | .. |
---|
198 | 187 | } |
---|
199 | 188 | |
---|
200 | 189 | /** |
---|
201 | | - * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message |
---|
| 190 | + * vmbus_prep_negotiate_resp() - Create default response for Negotiate message |
---|
202 | 191 | * @icmsghdrp: Pointer to msg header structure |
---|
203 | | - * @icmsg_negotiate: Pointer to negotiate message structure |
---|
204 | 192 | * @buf: Raw buffer channel data |
---|
| 193 | + * @fw_version: The framework versions we can support. |
---|
| 194 | + * @fw_vercnt: The size of @fw_version. |
---|
| 195 | + * @srv_version: The service versions we can support. |
---|
| 196 | + * @srv_vercnt: The size of @srv_version. |
---|
| 197 | + * @nego_fw_version: The selected framework version. |
---|
| 198 | + * @nego_srv_version: The selected service version. |
---|
205 | 199 | * |
---|
206 | | - * @icmsghdrp is of type &struct icmsg_hdr. |
---|
| 200 | + * Note: Versions are given in decreasing order. |
---|
| 201 | + * |
---|
207 | 202 | * Set up and fill in default negotiate response message. |
---|
208 | | - * |
---|
209 | | - * The fw_version and fw_vercnt specifies the framework version that |
---|
210 | | - * we can support. |
---|
211 | | - * |
---|
212 | | - * The srv_version and srv_vercnt specifies the service |
---|
213 | | - * versions we can support. |
---|
214 | | - * |
---|
215 | | - * Versions are given in decreasing order. |
---|
216 | | - * |
---|
217 | | - * nego_fw_version and nego_srv_version store the selected protocol versions. |
---|
218 | | - * |
---|
219 | 203 | * Mainly used by Hyper-V drivers. |
---|
220 | 204 | */ |
---|
221 | 205 | bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, |
---|
.. | .. |
---|
332 | 316 | if (!channel) |
---|
333 | 317 | return NULL; |
---|
334 | 318 | |
---|
335 | | - spin_lock_init(&channel->lock); |
---|
| 319 | + spin_lock_init(&channel->sched_lock); |
---|
336 | 320 | init_completion(&channel->rescind_event); |
---|
337 | 321 | |
---|
338 | 322 | INIT_LIST_HEAD(&channel->sc_list); |
---|
339 | | - INIT_LIST_HEAD(&channel->percpu_list); |
---|
340 | 323 | |
---|
341 | 324 | tasklet_init(&channel->callback_event, |
---|
342 | 325 | vmbus_on_event, (unsigned long)channel); |
---|
| 326 | + |
---|
| 327 | + hv_ringbuffer_pre_init(channel); |
---|
343 | 328 | |
---|
344 | 329 | return channel; |
---|
345 | 330 | } |
---|
.. | .. |
---|
355 | 340 | kobject_put(&channel->kobj); |
---|
356 | 341 | } |
---|
357 | 342 | |
---|
358 | | -static void percpu_channel_enq(void *arg) |
---|
| 343 | +void vmbus_channel_map_relid(struct vmbus_channel *channel) |
---|
359 | 344 | { |
---|
360 | | - struct vmbus_channel *channel = arg; |
---|
361 | | - struct hv_per_cpu_context *hv_cpu |
---|
362 | | - = this_cpu_ptr(hv_context.cpu_context); |
---|
363 | | - |
---|
364 | | - list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list); |
---|
| 345 | + if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS)) |
---|
| 346 | + return; |
---|
| 347 | + /* |
---|
| 348 | + * The mapping of the channel's relid is visible from the CPUs that |
---|
| 349 | + * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will |
---|
| 350 | + * execute: |
---|
| 351 | + * |
---|
| 352 | + * (a) In the "normal (i.e., not resuming from hibernation)" path, |
---|
| 353 | + * the full barrier in virt_store_mb() guarantees that the store |
---|
| 354 | + * is propagated to all CPUs before the add_channel_work work |
---|
| 355 | + * is queued. In turn, add_channel_work is queued before the |
---|
| 356 | + * channel's ring buffer is allocated/initialized and the |
---|
| 357 | + * OPENCHANNEL message for the channel is sent in vmbus_open(). |
---|
| 358 | + * Hyper-V won't start sending the interrupts for the channel |
---|
| 359 | + * before the OPENCHANNEL message is acked. The memory barrier |
---|
| 360 | + * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures |
---|
| 361 | + * that vmbus_chan_sched() must find the channel's relid in |
---|
| 362 | + * recv_int_page before retrieving the channel pointer from the |
---|
| 363 | + * array of channels. |
---|
| 364 | + * |
---|
| 365 | + * (b) In the "resuming from hibernation" path, the virt_store_mb() |
---|
| 366 | + * guarantees that the store is propagated to all CPUs before |
---|
| 367 | + * the VMBus connection is marked as ready for the resume event |
---|
| 368 | + * (cf. check_ready_for_resume_event()). The interrupt handler |
---|
| 369 | + * of the VMBus driver and vmbus_chan_sched() can not run before |
---|
| 370 | + * vmbus_bus_resume() has completed execution (cf. resume_noirq). |
---|
| 371 | + */ |
---|
| 372 | + virt_store_mb( |
---|
| 373 | + vmbus_connection.channels[channel->offermsg.child_relid], |
---|
| 374 | + channel); |
---|
365 | 375 | } |
---|
366 | 376 | |
---|
367 | | -static void percpu_channel_deq(void *arg) |
---|
| 377 | +void vmbus_channel_unmap_relid(struct vmbus_channel *channel) |
---|
368 | 378 | { |
---|
369 | | - struct vmbus_channel *channel = arg; |
---|
370 | | - |
---|
371 | | - list_del_rcu(&channel->percpu_list); |
---|
| 379 | + if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS)) |
---|
| 380 | + return; |
---|
| 381 | + WRITE_ONCE( |
---|
| 382 | + vmbus_connection.channels[channel->offermsg.child_relid], |
---|
| 383 | + NULL); |
---|
372 | 384 | } |
---|
373 | | - |
---|
374 | 385 | |
---|
375 | 386 | static void vmbus_release_relid(u32 relid) |
---|
376 | 387 | { |
---|
.. | .. |
---|
386 | 397 | trace_vmbus_release_relid(&msg, ret); |
---|
387 | 398 | } |
---|
388 | 399 | |
---|
389 | | -void hv_process_channel_removal(u32 relid) |
---|
| 400 | +void hv_process_channel_removal(struct vmbus_channel *channel) |
---|
390 | 401 | { |
---|
391 | | - unsigned long flags; |
---|
392 | | - struct vmbus_channel *primary_channel, *channel; |
---|
393 | | - |
---|
394 | | - BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); |
---|
395 | | - |
---|
396 | | - /* |
---|
397 | | - * Make sure channel is valid as we may have raced. |
---|
398 | | - */ |
---|
399 | | - channel = relid2channel(relid); |
---|
400 | | - if (!channel) |
---|
401 | | - return; |
---|
402 | | - |
---|
| 402 | + lockdep_assert_held(&vmbus_connection.channel_mutex); |
---|
403 | 403 | BUG_ON(!channel->rescind); |
---|
404 | | - if (channel->target_cpu != get_cpu()) { |
---|
405 | | - put_cpu(); |
---|
406 | | - smp_call_function_single(channel->target_cpu, |
---|
407 | | - percpu_channel_deq, channel, true); |
---|
408 | | - } else { |
---|
409 | | - percpu_channel_deq(channel); |
---|
410 | | - put_cpu(); |
---|
411 | | - } |
---|
412 | | - |
---|
413 | | - if (channel->primary_channel == NULL) { |
---|
414 | | - list_del(&channel->listentry); |
---|
415 | | - |
---|
416 | | - primary_channel = channel; |
---|
417 | | - } else { |
---|
418 | | - primary_channel = channel->primary_channel; |
---|
419 | | - spin_lock_irqsave(&primary_channel->lock, flags); |
---|
420 | | - list_del(&channel->sc_list); |
---|
421 | | - primary_channel->num_sc--; |
---|
422 | | - spin_unlock_irqrestore(&primary_channel->lock, flags); |
---|
423 | | - } |
---|
424 | 404 | |
---|
425 | 405 | /* |
---|
426 | | - * We need to free the bit for init_vp_index() to work in the case |
---|
427 | | - * of sub-channel, when we reload drivers like hv_netvsc. |
---|
| 406 | + * hv_process_channel_removal() could find INVALID_RELID only for |
---|
| 407 | + * hv_sock channels. See the inline comments in vmbus_onoffer(). |
---|
428 | 408 | */ |
---|
429 | | - if (channel->affinity_policy == HV_LOCALIZED) |
---|
430 | | - cpumask_clear_cpu(channel->target_cpu, |
---|
431 | | - &primary_channel->alloced_cpus_in_node); |
---|
| 409 | + WARN_ON(channel->offermsg.child_relid == INVALID_RELID && |
---|
| 410 | + !is_hvsock_channel(channel)); |
---|
432 | 411 | |
---|
433 | | - vmbus_release_relid(relid); |
---|
| 412 | + /* |
---|
| 413 | + * Upon suspend, an in-use hv_sock channel is removed from the array of |
---|
| 414 | + * channels and the relid is invalidated. After hibernation, when the |
---|
| 415 | + * user-space appplication destroys the channel, it's unnecessary and |
---|
| 416 | + * unsafe to remove the channel from the array of channels. See also |
---|
| 417 | + * the inline comments before the call of vmbus_release_relid() below. |
---|
| 418 | + */ |
---|
| 419 | + if (channel->offermsg.child_relid != INVALID_RELID) |
---|
| 420 | + vmbus_channel_unmap_relid(channel); |
---|
| 421 | + |
---|
| 422 | + if (channel->primary_channel == NULL) |
---|
| 423 | + list_del(&channel->listentry); |
---|
| 424 | + else |
---|
| 425 | + list_del(&channel->sc_list); |
---|
| 426 | + |
---|
| 427 | + /* |
---|
| 428 | + * If this is a "perf" channel, updates the hv_numa_map[] masks so that |
---|
| 429 | + * init_vp_index() can (re-)use the CPU. |
---|
| 430 | + */ |
---|
| 431 | + if (hv_is_perf_channel(channel)) |
---|
| 432 | + hv_clear_alloced_cpu(channel->target_cpu); |
---|
| 433 | + |
---|
| 434 | + /* |
---|
| 435 | + * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and |
---|
| 436 | + * the relid is invalidated; after hibernation, when the user-space app |
---|
| 437 | + * destroys the channel, the relid is INVALID_RELID, and in this case |
---|
| 438 | + * it's unnecessary and unsafe to release the old relid, since the same |
---|
| 439 | + * relid can refer to a completely different channel now. |
---|
| 440 | + */ |
---|
| 441 | + if (channel->offermsg.child_relid != INVALID_RELID) |
---|
| 442 | + vmbus_release_relid(channel->offermsg.child_relid); |
---|
434 | 443 | |
---|
435 | 444 | free_channel(channel); |
---|
436 | 445 | } |
---|
.. | .. |
---|
454 | 463 | struct vmbus_channel *newchannel = |
---|
455 | 464 | container_of(work, struct vmbus_channel, add_channel_work); |
---|
456 | 465 | struct vmbus_channel *primary_channel = newchannel->primary_channel; |
---|
457 | | - unsigned long flags; |
---|
458 | | - u16 dev_type; |
---|
459 | 466 | int ret; |
---|
460 | | - |
---|
461 | | - dev_type = hv_get_dev_type(newchannel); |
---|
462 | | - |
---|
463 | | - init_vp_index(newchannel, dev_type); |
---|
464 | | - |
---|
465 | | - if (newchannel->target_cpu != get_cpu()) { |
---|
466 | | - put_cpu(); |
---|
467 | | - smp_call_function_single(newchannel->target_cpu, |
---|
468 | | - percpu_channel_enq, |
---|
469 | | - newchannel, true); |
---|
470 | | - } else { |
---|
471 | | - percpu_channel_enq(newchannel); |
---|
472 | | - put_cpu(); |
---|
473 | | - } |
---|
474 | 467 | |
---|
475 | 468 | /* |
---|
476 | 469 | * This state is used to indicate a successful open |
---|
.. | .. |
---|
503 | 496 | if (!newchannel->device_obj) |
---|
504 | 497 | goto err_deq_chan; |
---|
505 | 498 | |
---|
506 | | - newchannel->device_obj->device_id = dev_type; |
---|
| 499 | + newchannel->device_obj->device_id = newchannel->device_id; |
---|
507 | 500 | /* |
---|
508 | 501 | * Add the new device to the bus. This will kick off device-driver |
---|
509 | 502 | * binding which eventually invokes the device driver's AddDevice() |
---|
510 | 503 | * method. |
---|
| 504 | + * |
---|
| 505 | + * If vmbus_device_register() fails, the 'device_obj' is freed in |
---|
| 506 | + * vmbus_device_release() as called by device_unregister() in the |
---|
| 507 | + * error path of vmbus_device_register(). In the outside error |
---|
| 508 | + * path, there's no need to free it. |
---|
511 | 509 | */ |
---|
512 | 510 | ret = vmbus_device_register(newchannel->device_obj); |
---|
513 | 511 | |
---|
514 | 512 | if (ret != 0) { |
---|
515 | 513 | pr_err("unable to add child device object (relid %d)\n", |
---|
516 | 514 | newchannel->offermsg.child_relid); |
---|
517 | | - kfree(newchannel->device_obj); |
---|
518 | 515 | goto err_deq_chan; |
---|
519 | 516 | } |
---|
520 | 517 | |
---|
.. | .. |
---|
530 | 527 | */ |
---|
531 | 528 | newchannel->probe_done = true; |
---|
532 | 529 | |
---|
533 | | - if (primary_channel == NULL) { |
---|
| 530 | + if (primary_channel == NULL) |
---|
534 | 531 | list_del(&newchannel->listentry); |
---|
535 | | - } else { |
---|
536 | | - spin_lock_irqsave(&primary_channel->lock, flags); |
---|
| 532 | + else |
---|
537 | 533 | list_del(&newchannel->sc_list); |
---|
538 | | - spin_unlock_irqrestore(&primary_channel->lock, flags); |
---|
539 | | - } |
---|
| 534 | + |
---|
| 535 | + /* vmbus_process_offer() has mapped the channel. */ |
---|
| 536 | + vmbus_channel_unmap_relid(newchannel); |
---|
540 | 537 | |
---|
541 | 538 | mutex_unlock(&vmbus_connection.channel_mutex); |
---|
542 | | - |
---|
543 | | - if (newchannel->target_cpu != get_cpu()) { |
---|
544 | | - put_cpu(); |
---|
545 | | - smp_call_function_single(newchannel->target_cpu, |
---|
546 | | - percpu_channel_deq, |
---|
547 | | - newchannel, true); |
---|
548 | | - } else { |
---|
549 | | - percpu_channel_deq(newchannel); |
---|
550 | | - put_cpu(); |
---|
551 | | - } |
---|
552 | 539 | |
---|
553 | 540 | vmbus_release_relid(newchannel->offermsg.child_relid); |
---|
554 | 541 | |
---|
.. | .. |
---|
563 | 550 | { |
---|
564 | 551 | struct vmbus_channel *channel; |
---|
565 | 552 | struct workqueue_struct *wq; |
---|
566 | | - unsigned long flags; |
---|
567 | 553 | bool fnew = true; |
---|
568 | 554 | |
---|
| 555 | + /* |
---|
| 556 | + * Synchronize vmbus_process_offer() and CPU hotplugging: |
---|
| 557 | + * |
---|
| 558 | + * CPU1 CPU2 |
---|
| 559 | + * |
---|
| 560 | + * [vmbus_process_offer()] [Hot removal of the CPU] |
---|
| 561 | + * |
---|
| 562 | + * CPU_READ_LOCK CPUS_WRITE_LOCK |
---|
| 563 | + * LOAD cpu_online_mask SEARCH chn_list |
---|
| 564 | + * STORE target_cpu LOAD target_cpu |
---|
| 565 | + * INSERT chn_list STORE cpu_online_mask |
---|
| 566 | + * CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK |
---|
| 567 | + * |
---|
| 568 | + * Forbids: CPU1's LOAD from *not* seing CPU2's STORE && |
---|
| 569 | + * CPU2's SEARCH from *not* seeing CPU1's INSERT |
---|
| 570 | + * |
---|
| 571 | + * Forbids: CPU2's SEARCH from seeing CPU1's INSERT && |
---|
| 572 | + * CPU2's LOAD from *not* seing CPU1's STORE |
---|
| 573 | + */ |
---|
| 574 | + cpus_read_lock(); |
---|
| 575 | + |
---|
| 576 | + /* |
---|
| 577 | + * Serializes the modifications of the chn_list list as well as |
---|
| 578 | + * the accesses to next_numa_node_id in init_vp_index(). |
---|
| 579 | + */ |
---|
569 | 580 | mutex_lock(&vmbus_connection.channel_mutex); |
---|
| 581 | + |
---|
| 582 | + init_vp_index(newchannel); |
---|
| 583 | + |
---|
| 584 | + /* Remember the channels that should be cleaned up upon suspend. */ |
---|
| 585 | + if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel)) |
---|
| 586 | + atomic_inc(&vmbus_connection.nr_chan_close_on_suspend); |
---|
570 | 587 | |
---|
571 | 588 | /* |
---|
572 | 589 | * Now that we have acquired the channel_mutex, |
---|
.. | .. |
---|
575 | 592 | atomic_dec(&vmbus_connection.offer_in_progress); |
---|
576 | 593 | |
---|
577 | 594 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { |
---|
578 | | - if (!uuid_le_cmp(channel->offermsg.offer.if_type, |
---|
579 | | - newchannel->offermsg.offer.if_type) && |
---|
580 | | - !uuid_le_cmp(channel->offermsg.offer.if_instance, |
---|
581 | | - newchannel->offermsg.offer.if_instance)) { |
---|
| 595 | + if (guid_equal(&channel->offermsg.offer.if_type, |
---|
| 596 | + &newchannel->offermsg.offer.if_type) && |
---|
| 597 | + guid_equal(&channel->offermsg.offer.if_instance, |
---|
| 598 | + &newchannel->offermsg.offer.if_instance)) { |
---|
582 | 599 | fnew = false; |
---|
583 | 600 | break; |
---|
584 | 601 | } |
---|
585 | 602 | } |
---|
586 | 603 | |
---|
587 | | - if (fnew) |
---|
| 604 | + if (fnew) { |
---|
588 | 605 | list_add_tail(&newchannel->listentry, |
---|
589 | 606 | &vmbus_connection.chn_list); |
---|
590 | | - else { |
---|
| 607 | + } else { |
---|
591 | 608 | /* |
---|
592 | 609 | * Check to see if this is a valid sub-channel. |
---|
593 | 610 | */ |
---|
594 | 611 | if (newchannel->offermsg.offer.sub_channel_index == 0) { |
---|
595 | 612 | mutex_unlock(&vmbus_connection.channel_mutex); |
---|
| 613 | + cpus_read_unlock(); |
---|
596 | 614 | /* |
---|
597 | 615 | * Don't call free_channel(), because newchannel->kobj |
---|
598 | 616 | * is not initialized yet. |
---|
.. | .. |
---|
605 | 623 | * Process the sub-channel. |
---|
606 | 624 | */ |
---|
607 | 625 | newchannel->primary_channel = channel; |
---|
608 | | - spin_lock_irqsave(&channel->lock, flags); |
---|
609 | 626 | list_add_tail(&newchannel->sc_list, &channel->sc_list); |
---|
610 | | - spin_unlock_irqrestore(&channel->lock, flags); |
---|
611 | 627 | } |
---|
612 | 628 | |
---|
| 629 | + vmbus_channel_map_relid(newchannel); |
---|
| 630 | + |
---|
613 | 631 | mutex_unlock(&vmbus_connection.channel_mutex); |
---|
| 632 | + cpus_read_unlock(); |
---|
614 | 633 | |
---|
615 | 634 | /* |
---|
616 | 635 | * vmbus_process_offer() mustn't call channel->sc_creation_callback() |
---|
.. | .. |
---|
643 | 662 | * We use this state to statically distribute the channel interrupt load. |
---|
644 | 663 | */ |
---|
645 | 664 | static int next_numa_node_id; |
---|
646 | | -/* |
---|
647 | | - * init_vp_index() accesses global variables like next_numa_node_id, and |
---|
648 | | - * it can run concurrently for primary channels and sub-channels: see |
---|
649 | | - * vmbus_process_offer(), so we need the lock to protect the global |
---|
650 | | - * variables. |
---|
651 | | - */ |
---|
652 | | -static DEFINE_SPINLOCK(bind_channel_to_cpu_lock); |
---|
653 | 665 | |
---|
654 | 666 | /* |
---|
655 | 667 | * Starting with Win8, we can statically distribute the incoming |
---|
656 | 668 | * channel interrupt load by binding a channel to VCPU. |
---|
657 | | - * We distribute the interrupt loads to one or more NUMA nodes based on |
---|
658 | | - * the channel's affinity_policy. |
---|
659 | 669 | * |
---|
660 | 670 | * For pre-win8 hosts or non-performance critical channels we assign the |
---|
661 | | - * first CPU in the first NUMA node. |
---|
| 671 | + * VMBUS_CONNECT_CPU. |
---|
| 672 | + * |
---|
| 673 | + * Starting with win8, performance critical channels will be distributed |
---|
| 674 | + * evenly among all the available NUMA nodes. Once the node is assigned, |
---|
| 675 | + * we will assign the CPU based on a simple round robin scheme. |
---|
662 | 676 | */ |
---|
663 | | -static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) |
---|
| 677 | +static void init_vp_index(struct vmbus_channel *channel) |
---|
664 | 678 | { |
---|
665 | | - u32 cur_cpu; |
---|
666 | | - bool perf_chn = vmbus_devs[dev_type].perf_device; |
---|
667 | | - struct vmbus_channel *primary = channel->primary_channel; |
---|
668 | | - int next_node; |
---|
| 679 | + bool perf_chn = hv_is_perf_channel(channel); |
---|
669 | 680 | cpumask_var_t available_mask; |
---|
670 | 681 | struct cpumask *alloced_mask; |
---|
| 682 | + u32 target_cpu; |
---|
| 683 | + int numa_node; |
---|
671 | 684 | |
---|
672 | 685 | if ((vmbus_proto_version == VERSION_WS2008) || |
---|
673 | 686 | (vmbus_proto_version == VERSION_WIN7) || (!perf_chn) || |
---|
674 | 687 | !alloc_cpumask_var(&available_mask, GFP_KERNEL)) { |
---|
675 | 688 | /* |
---|
676 | 689 | * Prior to win8, all channel interrupts are |
---|
677 | | - * delivered on cpu 0. |
---|
| 690 | + * delivered on VMBUS_CONNECT_CPU. |
---|
678 | 691 | * Also if the channel is not a performance critical |
---|
679 | | - * channel, bind it to cpu 0. |
---|
680 | | - * In case alloc_cpumask_var() fails, bind it to cpu 0. |
---|
| 692 | + * channel, bind it to VMBUS_CONNECT_CPU. |
---|
| 693 | + * In case alloc_cpumask_var() fails, bind it to |
---|
| 694 | + * VMBUS_CONNECT_CPU. |
---|
681 | 695 | */ |
---|
682 | | - channel->numa_node = 0; |
---|
683 | | - channel->target_cpu = 0; |
---|
684 | | - channel->target_vp = hv_cpu_number_to_vp_number(0); |
---|
| 696 | + channel->target_cpu = VMBUS_CONNECT_CPU; |
---|
| 697 | + if (perf_chn) |
---|
| 698 | + hv_set_alloced_cpu(VMBUS_CONNECT_CPU); |
---|
685 | 699 | return; |
---|
686 | 700 | } |
---|
687 | 701 | |
---|
688 | | - spin_lock(&bind_channel_to_cpu_lock); |
---|
689 | | - |
---|
690 | | - /* |
---|
691 | | - * Based on the channel affinity policy, we will assign the NUMA |
---|
692 | | - * nodes. |
---|
693 | | - */ |
---|
694 | | - |
---|
695 | | - if ((channel->affinity_policy == HV_BALANCED) || (!primary)) { |
---|
696 | | - while (true) { |
---|
697 | | - next_node = next_numa_node_id++; |
---|
698 | | - if (next_node == nr_node_ids) { |
---|
699 | | - next_node = next_numa_node_id = 0; |
---|
700 | | - continue; |
---|
701 | | - } |
---|
702 | | - if (cpumask_empty(cpumask_of_node(next_node))) |
---|
703 | | - continue; |
---|
704 | | - break; |
---|
| 702 | + while (true) { |
---|
| 703 | + numa_node = next_numa_node_id++; |
---|
| 704 | + if (numa_node == nr_node_ids) { |
---|
| 705 | + next_numa_node_id = 0; |
---|
| 706 | + continue; |
---|
705 | 707 | } |
---|
706 | | - channel->numa_node = next_node; |
---|
707 | | - primary = channel; |
---|
| 708 | + if (cpumask_empty(cpumask_of_node(numa_node))) |
---|
| 709 | + continue; |
---|
| 710 | + break; |
---|
708 | 711 | } |
---|
709 | | - alloced_mask = &hv_context.hv_numa_map[primary->numa_node]; |
---|
| 712 | + alloced_mask = &hv_context.hv_numa_map[numa_node]; |
---|
710 | 713 | |
---|
711 | 714 | if (cpumask_weight(alloced_mask) == |
---|
712 | | - cpumask_weight(cpumask_of_node(primary->numa_node))) { |
---|
| 715 | + cpumask_weight(cpumask_of_node(numa_node))) { |
---|
713 | 716 | /* |
---|
714 | 717 | * We have cycled through all the CPUs in the node; |
---|
715 | 718 | * reset the alloced map. |
---|
.. | .. |
---|
717 | 720 | cpumask_clear(alloced_mask); |
---|
718 | 721 | } |
---|
719 | 722 | |
---|
720 | | - cpumask_xor(available_mask, alloced_mask, |
---|
721 | | - cpumask_of_node(primary->numa_node)); |
---|
| 723 | + cpumask_xor(available_mask, alloced_mask, cpumask_of_node(numa_node)); |
---|
722 | 724 | |
---|
723 | | - cur_cpu = -1; |
---|
| 725 | + target_cpu = cpumask_first(available_mask); |
---|
| 726 | + cpumask_set_cpu(target_cpu, alloced_mask); |
---|
724 | 727 | |
---|
725 | | - if (primary->affinity_policy == HV_LOCALIZED) { |
---|
726 | | - /* |
---|
727 | | - * Normally Hyper-V host doesn't create more subchannels |
---|
728 | | - * than there are VCPUs on the node but it is possible when not |
---|
729 | | - * all present VCPUs on the node are initialized by guest. |
---|
730 | | - * Clear the alloced_cpus_in_node to start over. |
---|
731 | | - */ |
---|
732 | | - if (cpumask_equal(&primary->alloced_cpus_in_node, |
---|
733 | | - cpumask_of_node(primary->numa_node))) |
---|
734 | | - cpumask_clear(&primary->alloced_cpus_in_node); |
---|
735 | | - } |
---|
736 | | - |
---|
737 | | - while (true) { |
---|
738 | | - cur_cpu = cpumask_next(cur_cpu, available_mask); |
---|
739 | | - if (cur_cpu >= nr_cpu_ids) { |
---|
740 | | - cur_cpu = -1; |
---|
741 | | - cpumask_copy(available_mask, |
---|
742 | | - cpumask_of_node(primary->numa_node)); |
---|
743 | | - continue; |
---|
744 | | - } |
---|
745 | | - |
---|
746 | | - if (primary->affinity_policy == HV_LOCALIZED) { |
---|
747 | | - /* |
---|
748 | | - * NOTE: in the case of sub-channel, we clear the |
---|
749 | | - * sub-channel related bit(s) in |
---|
750 | | - * primary->alloced_cpus_in_node in |
---|
751 | | - * hv_process_channel_removal(), so when we |
---|
752 | | - * reload drivers like hv_netvsc in SMP guest, here |
---|
753 | | - * we're able to re-allocate |
---|
754 | | - * bit from primary->alloced_cpus_in_node. |
---|
755 | | - */ |
---|
756 | | - if (!cpumask_test_cpu(cur_cpu, |
---|
757 | | - &primary->alloced_cpus_in_node)) { |
---|
758 | | - cpumask_set_cpu(cur_cpu, |
---|
759 | | - &primary->alloced_cpus_in_node); |
---|
760 | | - cpumask_set_cpu(cur_cpu, alloced_mask); |
---|
761 | | - break; |
---|
762 | | - } |
---|
763 | | - } else { |
---|
764 | | - cpumask_set_cpu(cur_cpu, alloced_mask); |
---|
765 | | - break; |
---|
766 | | - } |
---|
767 | | - } |
---|
768 | | - |
---|
769 | | - channel->target_cpu = cur_cpu; |
---|
770 | | - channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu); |
---|
771 | | - |
---|
772 | | - spin_unlock(&bind_channel_to_cpu_lock); |
---|
| 728 | + channel->target_cpu = target_cpu; |
---|
773 | 729 | |
---|
774 | 730 | free_cpumask_var(available_mask); |
---|
775 | 731 | } |
---|
.. | .. |
---|
809 | 765 | if (completion_done(&vmbus_connection.unload_event)) |
---|
810 | 766 | goto completed; |
---|
811 | 767 | |
---|
812 | | - for_each_online_cpu(cpu) { |
---|
| 768 | + for_each_present_cpu(cpu) { |
---|
813 | 769 | struct hv_per_cpu_context *hv_cpu |
---|
814 | 770 | = per_cpu_ptr(hv_context.cpu_context, cpu); |
---|
815 | 771 | |
---|
| 772 | + /* |
---|
| 773 | + * In a CoCo VM the synic_message_page is not allocated |
---|
| 774 | + * in hv_synic_alloc(). Instead it is set/cleared in |
---|
| 775 | + * hv_synic_enable_regs() and hv_synic_disable_regs() |
---|
| 776 | + * such that it is set only when the CPU is online. If |
---|
| 777 | + * not all present CPUs are online, the message page |
---|
| 778 | + * might be NULL, so skip such CPUs. |
---|
| 779 | + */ |
---|
816 | 780 | page_addr = hv_cpu->synic_message_page; |
---|
| 781 | + if (!page_addr) |
---|
| 782 | + continue; |
---|
| 783 | + |
---|
817 | 784 | msg = (struct hv_message *)page_addr |
---|
818 | 785 | + VMBUS_MESSAGE_SINT; |
---|
819 | 786 | |
---|
.. | .. |
---|
847 | 814 | * maybe-pending messages on all CPUs to be able to receive new |
---|
848 | 815 | * messages after we reconnect. |
---|
849 | 816 | */ |
---|
850 | | - for_each_online_cpu(cpu) { |
---|
| 817 | + for_each_present_cpu(cpu) { |
---|
851 | 818 | struct hv_per_cpu_context *hv_cpu |
---|
852 | 819 | = per_cpu_ptr(hv_context.cpu_context, cpu); |
---|
853 | 820 | |
---|
854 | 821 | page_addr = hv_cpu->synic_message_page; |
---|
| 822 | + if (!page_addr) |
---|
| 823 | + continue; |
---|
| 824 | + |
---|
855 | 825 | msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; |
---|
856 | 826 | msg->header.message_type = HVMSG_NONE; |
---|
857 | 827 | } |
---|
.. | .. |
---|
896 | 866 | vmbus_wait_for_unload(); |
---|
897 | 867 | } |
---|
898 | 868 | |
---|
| 869 | +static void check_ready_for_resume_event(void) |
---|
| 870 | +{ |
---|
| 871 | + /* |
---|
| 872 | + * If all the old primary channels have been fixed up, then it's safe |
---|
| 873 | + * to resume. |
---|
| 874 | + */ |
---|
| 875 | + if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume)) |
---|
| 876 | + complete(&vmbus_connection.ready_for_resume_event); |
---|
| 877 | +} |
---|
| 878 | + |
---|
| 879 | +static void vmbus_setup_channel_state(struct vmbus_channel *channel, |
---|
| 880 | + struct vmbus_channel_offer_channel *offer) |
---|
| 881 | +{ |
---|
| 882 | + /* |
---|
| 883 | + * Setup state for signalling the host. |
---|
| 884 | + */ |
---|
| 885 | + channel->sig_event = VMBUS_EVENT_CONNECTION_ID; |
---|
| 886 | + |
---|
| 887 | + if (vmbus_proto_version != VERSION_WS2008) { |
---|
| 888 | + channel->is_dedicated_interrupt = |
---|
| 889 | + (offer->is_dedicated_interrupt != 0); |
---|
| 890 | + channel->sig_event = offer->connection_id; |
---|
| 891 | + } |
---|
| 892 | + |
---|
| 893 | + memcpy(&channel->offermsg, offer, |
---|
| 894 | + sizeof(struct vmbus_channel_offer_channel)); |
---|
| 895 | + channel->monitor_grp = (u8)offer->monitorid / 32; |
---|
| 896 | + channel->monitor_bit = (u8)offer->monitorid % 32; |
---|
| 897 | + channel->device_id = hv_get_dev_type(channel); |
---|
| 898 | +} |
---|
| 899 | + |
---|
| 900 | +/* |
---|
| 901 | + * find_primary_channel_by_offer - Get the channel object given the new offer. |
---|
| 902 | + * This is only used in the resume path of hibernation. |
---|
| 903 | + */ |
---|
| 904 | +static struct vmbus_channel * |
---|
| 905 | +find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer) |
---|
| 906 | +{ |
---|
| 907 | + struct vmbus_channel *channel = NULL, *iter; |
---|
| 908 | + const guid_t *inst1, *inst2; |
---|
| 909 | + |
---|
| 910 | + /* Ignore sub-channel offers. */ |
---|
| 911 | + if (offer->offer.sub_channel_index != 0) |
---|
| 912 | + return NULL; |
---|
| 913 | + |
---|
| 914 | + mutex_lock(&vmbus_connection.channel_mutex); |
---|
| 915 | + |
---|
| 916 | + list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) { |
---|
| 917 | + inst1 = &iter->offermsg.offer.if_instance; |
---|
| 918 | + inst2 = &offer->offer.if_instance; |
---|
| 919 | + |
---|
| 920 | + if (guid_equal(inst1, inst2)) { |
---|
| 921 | + channel = iter; |
---|
| 922 | + break; |
---|
| 923 | + } |
---|
| 924 | + } |
---|
| 925 | + |
---|
| 926 | + mutex_unlock(&vmbus_connection.channel_mutex); |
---|
| 927 | + |
---|
| 928 | + return channel; |
---|
| 929 | +} |
---|
| 930 | + |
---|
899 | 931 | /* |
---|
900 | 932 | * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. |
---|
901 | 933 | * |
---|
.. | .. |
---|
903 | 935 | static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) |
---|
904 | 936 | { |
---|
905 | 937 | struct vmbus_channel_offer_channel *offer; |
---|
906 | | - struct vmbus_channel *newchannel; |
---|
| 938 | + struct vmbus_channel *oldchannel, *newchannel; |
---|
| 939 | + size_t offer_sz; |
---|
907 | 940 | |
---|
908 | 941 | offer = (struct vmbus_channel_offer_channel *)hdr; |
---|
909 | 942 | |
---|
910 | 943 | trace_vmbus_onoffer(offer); |
---|
| 944 | + |
---|
| 945 | + oldchannel = find_primary_channel_by_offer(offer); |
---|
| 946 | + |
---|
| 947 | + if (oldchannel != NULL) { |
---|
| 948 | + /* |
---|
| 949 | + * We're resuming from hibernation: all the sub-channel and |
---|
| 950 | + * hv_sock channels we had before the hibernation should have |
---|
| 951 | + * been cleaned up, and now we must be seeing a re-offered |
---|
| 952 | + * primary channel that we had before the hibernation. |
---|
| 953 | + */ |
---|
| 954 | + |
---|
| 955 | + /* |
---|
| 956 | + * { Initially: channel relid = INVALID_RELID, |
---|
| 957 | + * channels[valid_relid] = NULL } |
---|
| 958 | + * |
---|
| 959 | + * CPU1 CPU2 |
---|
| 960 | + * |
---|
| 961 | + * [vmbus_onoffer()] [vmbus_device_release()] |
---|
| 962 | + * |
---|
| 963 | + * LOCK channel_mutex LOCK channel_mutex |
---|
| 964 | + * STORE channel relid = valid_relid LOAD r1 = channel relid |
---|
| 965 | + * MAP_RELID channel if (r1 != INVALID_RELID) |
---|
| 966 | + * UNLOCK channel_mutex UNMAP_RELID channel |
---|
| 967 | + * UNLOCK channel_mutex |
---|
| 968 | + * |
---|
| 969 | + * Forbids: r1 == valid_relid && |
---|
| 970 | + * channels[valid_relid] == channel |
---|
| 971 | + * |
---|
| 972 | + * Note. r1 can be INVALID_RELID only for an hv_sock channel. |
---|
| 973 | + * None of the hv_sock channels which were present before the |
---|
| 974 | + * suspend are re-offered upon the resume. See the WARN_ON() |
---|
| 975 | + * in hv_process_channel_removal(). |
---|
| 976 | + */ |
---|
| 977 | + mutex_lock(&vmbus_connection.channel_mutex); |
---|
| 978 | + |
---|
| 979 | + atomic_dec(&vmbus_connection.offer_in_progress); |
---|
| 980 | + |
---|
| 981 | + WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID); |
---|
| 982 | + /* Fix up the relid. */ |
---|
| 983 | + oldchannel->offermsg.child_relid = offer->child_relid; |
---|
| 984 | + |
---|
| 985 | + offer_sz = sizeof(*offer); |
---|
| 986 | + if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) { |
---|
| 987 | + /* |
---|
| 988 | + * This is not an error, since the host can also change |
---|
| 989 | + * the other field(s) of the offer, e.g. on WS RS5 |
---|
| 990 | + * (Build 17763), the offer->connection_id of the |
---|
| 991 | + * Mellanox VF vmbus device can change when the host |
---|
| 992 | + * reoffers the device upon resume. |
---|
| 993 | + */ |
---|
| 994 | + pr_debug("vmbus offer changed: relid=%d\n", |
---|
| 995 | + offer->child_relid); |
---|
| 996 | + |
---|
| 997 | + print_hex_dump_debug("Old vmbus offer: ", |
---|
| 998 | + DUMP_PREFIX_OFFSET, 16, 4, |
---|
| 999 | + &oldchannel->offermsg, offer_sz, |
---|
| 1000 | + false); |
---|
| 1001 | + print_hex_dump_debug("New vmbus offer: ", |
---|
| 1002 | + DUMP_PREFIX_OFFSET, 16, 4, |
---|
| 1003 | + offer, offer_sz, false); |
---|
| 1004 | + |
---|
| 1005 | + /* Fix up the old channel. */ |
---|
| 1006 | + vmbus_setup_channel_state(oldchannel, offer); |
---|
| 1007 | + } |
---|
| 1008 | + |
---|
| 1009 | + /* Add the channel back to the array of channels. */ |
---|
| 1010 | + vmbus_channel_map_relid(oldchannel); |
---|
| 1011 | + check_ready_for_resume_event(); |
---|
| 1012 | + |
---|
| 1013 | + mutex_unlock(&vmbus_connection.channel_mutex); |
---|
| 1014 | + return; |
---|
| 1015 | + } |
---|
911 | 1016 | |
---|
912 | 1017 | /* Allocate the channel object and save this offer. */ |
---|
913 | 1018 | newchannel = alloc_channel(); |
---|
.. | .. |
---|
918 | 1023 | return; |
---|
919 | 1024 | } |
---|
920 | 1025 | |
---|
921 | | - /* |
---|
922 | | - * Setup state for signalling the host. |
---|
923 | | - */ |
---|
924 | | - newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID; |
---|
925 | | - |
---|
926 | | - if (vmbus_proto_version != VERSION_WS2008) { |
---|
927 | | - newchannel->is_dedicated_interrupt = |
---|
928 | | - (offer->is_dedicated_interrupt != 0); |
---|
929 | | - newchannel->sig_event = offer->connection_id; |
---|
930 | | - } |
---|
931 | | - |
---|
932 | | - memcpy(&newchannel->offermsg, offer, |
---|
933 | | - sizeof(struct vmbus_channel_offer_channel)); |
---|
934 | | - newchannel->monitor_grp = (u8)offer->monitorid / 32; |
---|
935 | | - newchannel->monitor_bit = (u8)offer->monitorid % 32; |
---|
| 1026 | + vmbus_setup_channel_state(newchannel, offer); |
---|
936 | 1027 | |
---|
937 | 1028 | vmbus_process_offer(newchannel); |
---|
| 1029 | +} |
---|
| 1030 | + |
---|
| 1031 | +static void check_ready_for_suspend_event(void) |
---|
| 1032 | +{ |
---|
| 1033 | + /* |
---|
| 1034 | + * If all the sub-channels or hv_sock channels have been cleaned up, |
---|
| 1035 | + * then it's safe to suspend. |
---|
| 1036 | + */ |
---|
| 1037 | + if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend)) |
---|
| 1038 | + complete(&vmbus_connection.ready_for_suspend_event); |
---|
938 | 1039 | } |
---|
939 | 1040 | |
---|
940 | 1041 | /* |
---|
.. | .. |
---|
947 | 1048 | struct vmbus_channel_rescind_offer *rescind; |
---|
948 | 1049 | struct vmbus_channel *channel; |
---|
949 | 1050 | struct device *dev; |
---|
| 1051 | + bool clean_up_chan_for_suspend; |
---|
950 | 1052 | |
---|
951 | 1053 | rescind = (struct vmbus_channel_rescind_offer *)hdr; |
---|
952 | 1054 | |
---|
.. | .. |
---|
958 | 1060 | * offer comes in first and then the rescind. |
---|
959 | 1061 | * Since we process these events in work elements, |
---|
960 | 1062 | * and with preemption, we may end up processing |
---|
961 | | - * the events out of order. Given that we handle these |
---|
962 | | - * work elements on the same CPU, this is possible only |
---|
963 | | - * in the case of preemption. In any case wait here |
---|
964 | | - * until the offer processing has moved beyond the |
---|
965 | | - * point where the channel is discoverable. |
---|
| 1063 | + * the events out of order. We rely on the synchronization |
---|
| 1064 | + * provided by offer_in_progress and by channel_mutex for |
---|
| 1065 | + * ordering these events: |
---|
| 1066 | + * |
---|
| 1067 | + * { Initially: offer_in_progress = 1 } |
---|
| 1068 | + * |
---|
| 1069 | + * CPU1 CPU2 |
---|
| 1070 | + * |
---|
| 1071 | + * [vmbus_onoffer()] [vmbus_onoffer_rescind()] |
---|
| 1072 | + * |
---|
| 1073 | + * LOCK channel_mutex WAIT_ON offer_in_progress == 0 |
---|
| 1074 | + * DECREMENT offer_in_progress LOCK channel_mutex |
---|
| 1075 | + * STORE channels[] LOAD channels[] |
---|
| 1076 | + * UNLOCK channel_mutex UNLOCK channel_mutex |
---|
| 1077 | + * |
---|
| 1078 | + * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE |
---|
966 | 1079 | */ |
---|
967 | 1080 | |
---|
968 | 1081 | while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { |
---|
.. | .. |
---|
986 | 1099 | return; |
---|
987 | 1100 | } |
---|
988 | 1101 | |
---|
| 1102 | + clean_up_chan_for_suspend = is_hvsock_channel(channel) || |
---|
| 1103 | + is_sub_channel(channel); |
---|
989 | 1104 | /* |
---|
990 | 1105 | * Before setting channel->rescind in vmbus_rescind_cleanup(), we |
---|
991 | 1106 | * should make sure the channel callback is not running any more. |
---|
.. | .. |
---|
1011 | 1126 | if (channel->device_obj) { |
---|
1012 | 1127 | if (channel->chn_rescind_callback) { |
---|
1013 | 1128 | channel->chn_rescind_callback(channel); |
---|
| 1129 | + |
---|
| 1130 | + if (clean_up_chan_for_suspend) |
---|
| 1131 | + check_ready_for_suspend_event(); |
---|
| 1132 | + |
---|
1014 | 1133 | return; |
---|
1015 | 1134 | } |
---|
1016 | 1135 | /* |
---|
.. | .. |
---|
1036 | 1155 | * The channel is currently not open; |
---|
1037 | 1156 | * it is safe for us to cleanup the channel. |
---|
1038 | 1157 | */ |
---|
1039 | | - hv_process_channel_removal(rescind->child_relid); |
---|
| 1158 | + hv_process_channel_removal(channel); |
---|
1040 | 1159 | } else { |
---|
1041 | 1160 | complete(&channel->rescind_event); |
---|
1042 | 1161 | } |
---|
1043 | 1162 | mutex_unlock(&vmbus_connection.channel_mutex); |
---|
1044 | 1163 | } |
---|
| 1164 | + |
---|
| 1165 | + /* The "channel" may have been freed. Do not access it any longer. */ |
---|
| 1166 | + |
---|
| 1167 | + if (clean_up_chan_for_suspend) |
---|
| 1168 | + check_ready_for_suspend_event(); |
---|
1045 | 1169 | } |
---|
1046 | 1170 | |
---|
1047 | 1171 | void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) |
---|
.. | .. |
---|
1250 | 1374 | /* Channel message dispatch table */ |
---|
1251 | 1375 | const struct vmbus_channel_message_table_entry |
---|
1252 | 1376 | channel_message_table[CHANNELMSG_COUNT] = { |
---|
1253 | | - { CHANNELMSG_INVALID, 0, NULL }, |
---|
1254 | | - { CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer }, |
---|
1255 | | - { CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind }, |
---|
1256 | | - { CHANNELMSG_REQUESTOFFERS, 0, NULL }, |
---|
1257 | | - { CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered }, |
---|
1258 | | - { CHANNELMSG_OPENCHANNEL, 0, NULL }, |
---|
1259 | | - { CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result }, |
---|
1260 | | - { CHANNELMSG_CLOSECHANNEL, 0, NULL }, |
---|
1261 | | - { CHANNELMSG_GPADL_HEADER, 0, NULL }, |
---|
1262 | | - { CHANNELMSG_GPADL_BODY, 0, NULL }, |
---|
1263 | | - { CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created }, |
---|
1264 | | - { CHANNELMSG_GPADL_TEARDOWN, 0, NULL }, |
---|
1265 | | - { CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown }, |
---|
1266 | | - { CHANNELMSG_RELID_RELEASED, 0, NULL }, |
---|
1267 | | - { CHANNELMSG_INITIATE_CONTACT, 0, NULL }, |
---|
1268 | | - { CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response }, |
---|
1269 | | - { CHANNELMSG_UNLOAD, 0, NULL }, |
---|
1270 | | - { CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response }, |
---|
1271 | | - { CHANNELMSG_18, 0, NULL }, |
---|
1272 | | - { CHANNELMSG_19, 0, NULL }, |
---|
1273 | | - { CHANNELMSG_20, 0, NULL }, |
---|
1274 | | - { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL }, |
---|
1275 | | - { CHANNELMSG_22, 0, NULL }, |
---|
1276 | | - { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL }, |
---|
| 1377 | + { CHANNELMSG_INVALID, 0, NULL, 0}, |
---|
| 1378 | + { CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer, |
---|
| 1379 | + sizeof(struct vmbus_channel_offer_channel)}, |
---|
| 1380 | + { CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind, |
---|
| 1381 | + sizeof(struct vmbus_channel_rescind_offer) }, |
---|
| 1382 | + { CHANNELMSG_REQUESTOFFERS, 0, NULL, 0}, |
---|
| 1383 | + { CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered, 0}, |
---|
| 1384 | + { CHANNELMSG_OPENCHANNEL, 0, NULL, 0}, |
---|
| 1385 | + { CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result, |
---|
| 1386 | + sizeof(struct vmbus_channel_open_result)}, |
---|
| 1387 | + { CHANNELMSG_CLOSECHANNEL, 0, NULL, 0}, |
---|
| 1388 | + { CHANNELMSG_GPADL_HEADER, 0, NULL, 0}, |
---|
| 1389 | + { CHANNELMSG_GPADL_BODY, 0, NULL, 0}, |
---|
| 1390 | + { CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created, |
---|
| 1391 | + sizeof(struct vmbus_channel_gpadl_created)}, |
---|
| 1392 | + { CHANNELMSG_GPADL_TEARDOWN, 0, NULL, 0}, |
---|
| 1393 | + { CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown, |
---|
| 1394 | + sizeof(struct vmbus_channel_gpadl_torndown) }, |
---|
| 1395 | + { CHANNELMSG_RELID_RELEASED, 0, NULL, 0}, |
---|
| 1396 | + { CHANNELMSG_INITIATE_CONTACT, 0, NULL, 0}, |
---|
| 1397 | + { CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response, |
---|
| 1398 | + sizeof(struct vmbus_channel_version_response)}, |
---|
| 1399 | + { CHANNELMSG_UNLOAD, 0, NULL, 0}, |
---|
| 1400 | + { CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response, 0}, |
---|
| 1401 | + { CHANNELMSG_18, 0, NULL, 0}, |
---|
| 1402 | + { CHANNELMSG_19, 0, NULL, 0}, |
---|
| 1403 | + { CHANNELMSG_20, 0, NULL, 0}, |
---|
| 1404 | + { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0}, |
---|
| 1405 | + { CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0}, |
---|
| 1406 | + { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0}, |
---|
1277 | 1407 | }; |
---|
1278 | 1408 | |
---|
1279 | 1409 | /* |
---|
.. | .. |
---|
1281 | 1411 | * |
---|
1282 | 1412 | * This is invoked in the vmbus worker thread context. |
---|
1283 | 1413 | */ |
---|
1284 | | -void vmbus_onmessage(void *context) |
---|
| 1414 | +void vmbus_onmessage(struct vmbus_channel_message_header *hdr) |
---|
1285 | 1415 | { |
---|
1286 | | - struct hv_message *msg = context; |
---|
1287 | | - struct vmbus_channel_message_header *hdr; |
---|
1288 | | - |
---|
1289 | | - hdr = (struct vmbus_channel_message_header *)msg->u.payload; |
---|
1290 | | - |
---|
1291 | 1416 | trace_vmbus_on_message(hdr); |
---|
1292 | 1417 | |
---|
1293 | 1418 | /* |
---|
.. | .. |
---|
1332 | 1457 | |
---|
1333 | 1458 | return ret; |
---|
1334 | 1459 | } |
---|
1335 | | - |
---|
1336 | | -/* |
---|
1337 | | - * Retrieve the (sub) channel on which to send an outgoing request. |
---|
1338 | | - * When a primary channel has multiple sub-channels, we try to |
---|
1339 | | - * distribute the load equally amongst all available channels. |
---|
1340 | | - */ |
---|
1341 | | -struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary) |
---|
1342 | | -{ |
---|
1343 | | - struct list_head *cur, *tmp; |
---|
1344 | | - int cur_cpu; |
---|
1345 | | - struct vmbus_channel *cur_channel; |
---|
1346 | | - struct vmbus_channel *outgoing_channel = primary; |
---|
1347 | | - int next_channel; |
---|
1348 | | - int i = 1; |
---|
1349 | | - |
---|
1350 | | - if (list_empty(&primary->sc_list)) |
---|
1351 | | - return outgoing_channel; |
---|
1352 | | - |
---|
1353 | | - next_channel = primary->next_oc++; |
---|
1354 | | - |
---|
1355 | | - if (next_channel > (primary->num_sc)) { |
---|
1356 | | - primary->next_oc = 0; |
---|
1357 | | - return outgoing_channel; |
---|
1358 | | - } |
---|
1359 | | - |
---|
1360 | | - cur_cpu = hv_cpu_number_to_vp_number(smp_processor_id()); |
---|
1361 | | - list_for_each_safe(cur, tmp, &primary->sc_list) { |
---|
1362 | | - cur_channel = list_entry(cur, struct vmbus_channel, sc_list); |
---|
1363 | | - if (cur_channel->state != CHANNEL_OPENED_STATE) |
---|
1364 | | - continue; |
---|
1365 | | - |
---|
1366 | | - if (cur_channel->target_vp == cur_cpu) |
---|
1367 | | - return cur_channel; |
---|
1368 | | - |
---|
1369 | | - if (i == next_channel) |
---|
1370 | | - return cur_channel; |
---|
1371 | | - |
---|
1372 | | - i++; |
---|
1373 | | - } |
---|
1374 | | - |
---|
1375 | | - return outgoing_channel; |
---|
1376 | | -} |
---|
1377 | | -EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel); |
---|
1378 | 1460 | |
---|
1379 | 1461 | static void invoke_sc_cb(struct vmbus_channel *primary_channel) |
---|
1380 | 1462 | { |
---|