| .. | .. |
|---|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Copyright (c) 2009, Microsoft Corporation. |
|---|
| 3 | | - * |
|---|
| 4 | | - * This program is free software; you can redistribute it and/or modify it |
|---|
| 5 | | - * under the terms and conditions of the GNU General Public License, |
|---|
| 6 | | - * version 2, as published by the Free Software Foundation. |
|---|
| 7 | | - * |
|---|
| 8 | | - * This program is distributed in the hope it will be useful, but WITHOUT |
|---|
| 9 | | - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|---|
| 10 | | - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|---|
| 11 | | - * more details. |
|---|
| 12 | | - * |
|---|
| 13 | | - * You should have received a copy of the GNU General Public License along with |
|---|
| 14 | | - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple |
|---|
| 15 | | - * Place - Suite 330, Boston, MA 02111-1307 USA. |
|---|
| 16 | 4 | * |
|---|
| 17 | 5 | * Authors: |
|---|
| 18 | 6 | * Haiyang Zhang <haiyangz@microsoft.com> |
|---|
| .. | .. |
|---|
| 30 | 18 | #include <linux/module.h> |
|---|
| 31 | 19 | #include <linux/completion.h> |
|---|
| 32 | 20 | #include <linux/delay.h> |
|---|
| 21 | +#include <linux/cpu.h> |
|---|
| 33 | 22 | #include <linux/hyperv.h> |
|---|
| 34 | 23 | #include <asm/mshyperv.h> |
|---|
| 35 | 24 | |
|---|
| 36 | 25 | #include "hyperv_vmbus.h" |
|---|
| 37 | 26 | |
|---|
| 38 | | -static void init_vp_index(struct vmbus_channel *channel, u16 dev_type); |
|---|
| 27 | +static void init_vp_index(struct vmbus_channel *channel); |
|---|
| 39 | 28 | |
|---|
| 40 | | -static const struct vmbus_device vmbus_devs[] = { |
|---|
| 29 | +const struct vmbus_device vmbus_devs[] = { |
|---|
| 41 | 30 | /* IDE */ |
|---|
| 42 | 31 | { .dev_type = HV_IDE, |
|---|
| 43 | 32 | HV_IDE_GUID, |
|---|
| .. | .. |
|---|
| 141 | 130 | }; |
|---|
| 142 | 131 | |
|---|
| 143 | 132 | static const struct { |
|---|
| 144 | | - uuid_le guid; |
|---|
| 133 | + guid_t guid; |
|---|
| 145 | 134 | } vmbus_unsupported_devs[] = { |
|---|
| 146 | 135 | { HV_AVMA1_GUID }, |
|---|
| 147 | 136 | { HV_AVMA2_GUID }, |
|---|
| .. | .. |
|---|
| 171 | 160 | spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags); |
|---|
| 172 | 161 | } |
|---|
| 173 | 162 | |
|---|
| 174 | | -static bool is_unsupported_vmbus_devs(const uuid_le *guid) |
|---|
| 163 | +static bool is_unsupported_vmbus_devs(const guid_t *guid) |
|---|
| 175 | 164 | { |
|---|
| 176 | 165 | int i; |
|---|
| 177 | 166 | |
|---|
| 178 | 167 | for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++) |
|---|
| 179 | | - if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid)) |
|---|
| 168 | + if (guid_equal(guid, &vmbus_unsupported_devs[i].guid)) |
|---|
| 180 | 169 | return true; |
|---|
| 181 | 170 | return false; |
|---|
| 182 | 171 | } |
|---|
| 183 | 172 | |
|---|
| 184 | 173 | static u16 hv_get_dev_type(const struct vmbus_channel *channel) |
|---|
| 185 | 174 | { |
|---|
| 186 | | - const uuid_le *guid = &channel->offermsg.offer.if_type; |
|---|
| 175 | + const guid_t *guid = &channel->offermsg.offer.if_type; |
|---|
| 187 | 176 | u16 i; |
|---|
| 188 | 177 | |
|---|
| 189 | 178 | if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid)) |
|---|
| 190 | 179 | return HV_UNKNOWN; |
|---|
| 191 | 180 | |
|---|
| 192 | 181 | for (i = HV_IDE; i < HV_UNKNOWN; i++) { |
|---|
| 193 | | - if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) |
|---|
| 182 | + if (guid_equal(guid, &vmbus_devs[i].guid)) |
|---|
| 194 | 183 | return i; |
|---|
| 195 | 184 | } |
|---|
| 196 | 185 | pr_info("Unknown GUID: %pUl\n", guid); |
|---|
| .. | .. |
|---|
| 198 | 187 | } |
|---|
| 199 | 188 | |
|---|
| 200 | 189 | /** |
|---|
| 201 | | - * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message |
|---|
| 190 | + * vmbus_prep_negotiate_resp() - Create default response for Negotiate message |
|---|
| 202 | 191 | * @icmsghdrp: Pointer to msg header structure |
|---|
| 203 | | - * @icmsg_negotiate: Pointer to negotiate message structure |
|---|
| 204 | 192 | * @buf: Raw buffer channel data |
|---|
| 193 | + * @fw_version: The framework versions we can support. |
|---|
| 194 | + * @fw_vercnt: The size of @fw_version. |
|---|
| 195 | + * @srv_version: The service versions we can support. |
|---|
| 196 | + * @srv_vercnt: The size of @srv_version. |
|---|
| 197 | + * @nego_fw_version: The selected framework version. |
|---|
| 198 | + * @nego_srv_version: The selected service version. |
|---|
| 205 | 199 | * |
|---|
| 206 | | - * @icmsghdrp is of type &struct icmsg_hdr. |
|---|
| 200 | + * Note: Versions are given in decreasing order. |
|---|
| 201 | + * |
|---|
| 207 | 202 | * Set up and fill in default negotiate response message. |
|---|
| 208 | | - * |
|---|
| 209 | | - * The fw_version and fw_vercnt specifies the framework version that |
|---|
| 210 | | - * we can support. |
|---|
| 211 | | - * |
|---|
| 212 | | - * The srv_version and srv_vercnt specifies the service |
|---|
| 213 | | - * versions we can support. |
|---|
| 214 | | - * |
|---|
| 215 | | - * Versions are given in decreasing order. |
|---|
| 216 | | - * |
|---|
| 217 | | - * nego_fw_version and nego_srv_version store the selected protocol versions. |
|---|
| 218 | | - * |
|---|
| 219 | 203 | * Mainly used by Hyper-V drivers. |
|---|
| 220 | 204 | */ |
|---|
| 221 | 205 | bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, |
|---|
| .. | .. |
|---|
| 332 | 316 | if (!channel) |
|---|
| 333 | 317 | return NULL; |
|---|
| 334 | 318 | |
|---|
| 335 | | - spin_lock_init(&channel->lock); |
|---|
| 319 | + spin_lock_init(&channel->sched_lock); |
|---|
| 336 | 320 | init_completion(&channel->rescind_event); |
|---|
| 337 | 321 | |
|---|
| 338 | 322 | INIT_LIST_HEAD(&channel->sc_list); |
|---|
| 339 | | - INIT_LIST_HEAD(&channel->percpu_list); |
|---|
| 340 | 323 | |
|---|
| 341 | 324 | tasklet_init(&channel->callback_event, |
|---|
| 342 | 325 | vmbus_on_event, (unsigned long)channel); |
|---|
| 326 | + |
|---|
| 327 | + hv_ringbuffer_pre_init(channel); |
|---|
| 343 | 328 | |
|---|
| 344 | 329 | return channel; |
|---|
| 345 | 330 | } |
|---|
| .. | .. |
|---|
| 355 | 340 | kobject_put(&channel->kobj); |
|---|
| 356 | 341 | } |
|---|
| 357 | 342 | |
|---|
| 358 | | -static void percpu_channel_enq(void *arg) |
|---|
| 343 | +void vmbus_channel_map_relid(struct vmbus_channel *channel) |
|---|
| 359 | 344 | { |
|---|
| 360 | | - struct vmbus_channel *channel = arg; |
|---|
| 361 | | - struct hv_per_cpu_context *hv_cpu |
|---|
| 362 | | - = this_cpu_ptr(hv_context.cpu_context); |
|---|
| 363 | | - |
|---|
| 364 | | - list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list); |
|---|
| 345 | + if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS)) |
|---|
| 346 | + return; |
|---|
| 347 | + /* |
|---|
| 348 | + * The mapping of the channel's relid is visible from the CPUs that |
|---|
| 349 | + * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will |
|---|
| 350 | + * execute: |
|---|
| 351 | + * |
|---|
| 352 | + * (a) In the "normal (i.e., not resuming from hibernation)" path, |
|---|
| 353 | + * the full barrier in virt_store_mb() guarantees that the store |
|---|
| 354 | + * is propagated to all CPUs before the add_channel_work work |
|---|
| 355 | + * is queued. In turn, add_channel_work is queued before the |
|---|
| 356 | + * channel's ring buffer is allocated/initialized and the |
|---|
| 357 | + * OPENCHANNEL message for the channel is sent in vmbus_open(). |
|---|
| 358 | + * Hyper-V won't start sending the interrupts for the channel |
|---|
| 359 | + * before the OPENCHANNEL message is acked. The memory barrier |
|---|
| 360 | + * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures |
|---|
| 361 | + * that vmbus_chan_sched() must find the channel's relid in |
|---|
| 362 | + * recv_int_page before retrieving the channel pointer from the |
|---|
| 363 | + * array of channels. |
|---|
| 364 | + * |
|---|
| 365 | + * (b) In the "resuming from hibernation" path, the virt_store_mb() |
|---|
| 366 | + * guarantees that the store is propagated to all CPUs before |
|---|
| 367 | + * the VMBus connection is marked as ready for the resume event |
|---|
| 368 | + * (cf. check_ready_for_resume_event()). The interrupt handler |
|---|
| 369 | + * of the VMBus driver and vmbus_chan_sched() can not run before |
|---|
| 370 | + * vmbus_bus_resume() has completed execution (cf. resume_noirq). |
|---|
| 371 | + */ |
|---|
| 372 | + virt_store_mb( |
|---|
| 373 | + vmbus_connection.channels[channel->offermsg.child_relid], |
|---|
| 374 | + channel); |
|---|
| 365 | 375 | } |
|---|
| 366 | 376 | |
|---|
| 367 | | -static void percpu_channel_deq(void *arg) |
|---|
| 377 | +void vmbus_channel_unmap_relid(struct vmbus_channel *channel) |
|---|
| 368 | 378 | { |
|---|
| 369 | | - struct vmbus_channel *channel = arg; |
|---|
| 370 | | - |
|---|
| 371 | | - list_del_rcu(&channel->percpu_list); |
|---|
| 379 | + if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS)) |
|---|
| 380 | + return; |
|---|
| 381 | + WRITE_ONCE( |
|---|
| 382 | + vmbus_connection.channels[channel->offermsg.child_relid], |
|---|
| 383 | + NULL); |
|---|
| 372 | 384 | } |
|---|
| 373 | | - |
|---|
| 374 | 385 | |
|---|
| 375 | 386 | static void vmbus_release_relid(u32 relid) |
|---|
| 376 | 387 | { |
|---|
| .. | .. |
|---|
| 386 | 397 | trace_vmbus_release_relid(&msg, ret); |
|---|
| 387 | 398 | } |
|---|
| 388 | 399 | |
|---|
| 389 | | -void hv_process_channel_removal(u32 relid) |
|---|
| 400 | +void hv_process_channel_removal(struct vmbus_channel *channel) |
|---|
| 390 | 401 | { |
|---|
| 391 | | - unsigned long flags; |
|---|
| 392 | | - struct vmbus_channel *primary_channel, *channel; |
|---|
| 393 | | - |
|---|
| 394 | | - BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); |
|---|
| 395 | | - |
|---|
| 396 | | - /* |
|---|
| 397 | | - * Make sure channel is valid as we may have raced. |
|---|
| 398 | | - */ |
|---|
| 399 | | - channel = relid2channel(relid); |
|---|
| 400 | | - if (!channel) |
|---|
| 401 | | - return; |
|---|
| 402 | | - |
|---|
| 402 | + lockdep_assert_held(&vmbus_connection.channel_mutex); |
|---|
| 403 | 403 | BUG_ON(!channel->rescind); |
|---|
| 404 | | - if (channel->target_cpu != get_cpu()) { |
|---|
| 405 | | - put_cpu(); |
|---|
| 406 | | - smp_call_function_single(channel->target_cpu, |
|---|
| 407 | | - percpu_channel_deq, channel, true); |
|---|
| 408 | | - } else { |
|---|
| 409 | | - percpu_channel_deq(channel); |
|---|
| 410 | | - put_cpu(); |
|---|
| 411 | | - } |
|---|
| 412 | | - |
|---|
| 413 | | - if (channel->primary_channel == NULL) { |
|---|
| 414 | | - list_del(&channel->listentry); |
|---|
| 415 | | - |
|---|
| 416 | | - primary_channel = channel; |
|---|
| 417 | | - } else { |
|---|
| 418 | | - primary_channel = channel->primary_channel; |
|---|
| 419 | | - spin_lock_irqsave(&primary_channel->lock, flags); |
|---|
| 420 | | - list_del(&channel->sc_list); |
|---|
| 421 | | - primary_channel->num_sc--; |
|---|
| 422 | | - spin_unlock_irqrestore(&primary_channel->lock, flags); |
|---|
| 423 | | - } |
|---|
| 424 | 404 | |
|---|
| 425 | 405 | /* |
|---|
| 426 | | - * We need to free the bit for init_vp_index() to work in the case |
|---|
| 427 | | - * of sub-channel, when we reload drivers like hv_netvsc. |
|---|
| 406 | + * hv_process_channel_removal() could find INVALID_RELID only for |
|---|
| 407 | + * hv_sock channels. See the inline comments in vmbus_onoffer(). |
|---|
| 428 | 408 | */ |
|---|
| 429 | | - if (channel->affinity_policy == HV_LOCALIZED) |
|---|
| 430 | | - cpumask_clear_cpu(channel->target_cpu, |
|---|
| 431 | | - &primary_channel->alloced_cpus_in_node); |
|---|
| 409 | + WARN_ON(channel->offermsg.child_relid == INVALID_RELID && |
|---|
| 410 | + !is_hvsock_channel(channel)); |
|---|
| 432 | 411 | |
|---|
| 433 | | - vmbus_release_relid(relid); |
|---|
| 412 | + /* |
|---|
| 413 | + * Upon suspend, an in-use hv_sock channel is removed from the array of |
|---|
| 414 | + * channels and the relid is invalidated. After hibernation, when the |
|---|
| 415 | + * user-space appplication destroys the channel, it's unnecessary and |
|---|
| 416 | + * unsafe to remove the channel from the array of channels. See also |
|---|
| 417 | + * the inline comments before the call of vmbus_release_relid() below. |
|---|
| 418 | + */ |
|---|
| 419 | + if (channel->offermsg.child_relid != INVALID_RELID) |
|---|
| 420 | + vmbus_channel_unmap_relid(channel); |
|---|
| 421 | + |
|---|
| 422 | + if (channel->primary_channel == NULL) |
|---|
| 423 | + list_del(&channel->listentry); |
|---|
| 424 | + else |
|---|
| 425 | + list_del(&channel->sc_list); |
|---|
| 426 | + |
|---|
| 427 | + /* |
|---|
| 428 | + * If this is a "perf" channel, updates the hv_numa_map[] masks so that |
|---|
| 429 | + * init_vp_index() can (re-)use the CPU. |
|---|
| 430 | + */ |
|---|
| 431 | + if (hv_is_perf_channel(channel)) |
|---|
| 432 | + hv_clear_alloced_cpu(channel->target_cpu); |
|---|
| 433 | + |
|---|
| 434 | + /* |
|---|
| 435 | + * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and |
|---|
| 436 | + * the relid is invalidated; after hibernation, when the user-space app |
|---|
| 437 | + * destroys the channel, the relid is INVALID_RELID, and in this case |
|---|
| 438 | + * it's unnecessary and unsafe to release the old relid, since the same |
|---|
| 439 | + * relid can refer to a completely different channel now. |
|---|
| 440 | + */ |
|---|
| 441 | + if (channel->offermsg.child_relid != INVALID_RELID) |
|---|
| 442 | + vmbus_release_relid(channel->offermsg.child_relid); |
|---|
| 434 | 443 | |
|---|
| 435 | 444 | free_channel(channel); |
|---|
| 436 | 445 | } |
|---|
| .. | .. |
|---|
| 454 | 463 | struct vmbus_channel *newchannel = |
|---|
| 455 | 464 | container_of(work, struct vmbus_channel, add_channel_work); |
|---|
| 456 | 465 | struct vmbus_channel *primary_channel = newchannel->primary_channel; |
|---|
| 457 | | - unsigned long flags; |
|---|
| 458 | | - u16 dev_type; |
|---|
| 459 | 466 | int ret; |
|---|
| 460 | | - |
|---|
| 461 | | - dev_type = hv_get_dev_type(newchannel); |
|---|
| 462 | | - |
|---|
| 463 | | - init_vp_index(newchannel, dev_type); |
|---|
| 464 | | - |
|---|
| 465 | | - if (newchannel->target_cpu != get_cpu()) { |
|---|
| 466 | | - put_cpu(); |
|---|
| 467 | | - smp_call_function_single(newchannel->target_cpu, |
|---|
| 468 | | - percpu_channel_enq, |
|---|
| 469 | | - newchannel, true); |
|---|
| 470 | | - } else { |
|---|
| 471 | | - percpu_channel_enq(newchannel); |
|---|
| 472 | | - put_cpu(); |
|---|
| 473 | | - } |
|---|
| 474 | 467 | |
|---|
| 475 | 468 | /* |
|---|
| 476 | 469 | * This state is used to indicate a successful open |
|---|
| .. | .. |
|---|
| 503 | 496 | if (!newchannel->device_obj) |
|---|
| 504 | 497 | goto err_deq_chan; |
|---|
| 505 | 498 | |
|---|
| 506 | | - newchannel->device_obj->device_id = dev_type; |
|---|
| 499 | + newchannel->device_obj->device_id = newchannel->device_id; |
|---|
| 507 | 500 | /* |
|---|
| 508 | 501 | * Add the new device to the bus. This will kick off device-driver |
|---|
| 509 | 502 | * binding which eventually invokes the device driver's AddDevice() |
|---|
| 510 | 503 | * method. |
|---|
| 504 | + * |
|---|
| 505 | + * If vmbus_device_register() fails, the 'device_obj' is freed in |
|---|
| 506 | + * vmbus_device_release() as called by device_unregister() in the |
|---|
| 507 | + * error path of vmbus_device_register(). In the outside error |
|---|
| 508 | + * path, there's no need to free it. |
|---|
| 511 | 509 | */ |
|---|
| 512 | 510 | ret = vmbus_device_register(newchannel->device_obj); |
|---|
| 513 | 511 | |
|---|
| 514 | 512 | if (ret != 0) { |
|---|
| 515 | 513 | pr_err("unable to add child device object (relid %d)\n", |
|---|
| 516 | 514 | newchannel->offermsg.child_relid); |
|---|
| 517 | | - kfree(newchannel->device_obj); |
|---|
| 518 | 515 | goto err_deq_chan; |
|---|
| 519 | 516 | } |
|---|
| 520 | 517 | |
|---|
| .. | .. |
|---|
| 530 | 527 | */ |
|---|
| 531 | 528 | newchannel->probe_done = true; |
|---|
| 532 | 529 | |
|---|
| 533 | | - if (primary_channel == NULL) { |
|---|
| 530 | + if (primary_channel == NULL) |
|---|
| 534 | 531 | list_del(&newchannel->listentry); |
|---|
| 535 | | - } else { |
|---|
| 536 | | - spin_lock_irqsave(&primary_channel->lock, flags); |
|---|
| 532 | + else |
|---|
| 537 | 533 | list_del(&newchannel->sc_list); |
|---|
| 538 | | - spin_unlock_irqrestore(&primary_channel->lock, flags); |
|---|
| 539 | | - } |
|---|
| 534 | + |
|---|
| 535 | + /* vmbus_process_offer() has mapped the channel. */ |
|---|
| 536 | + vmbus_channel_unmap_relid(newchannel); |
|---|
| 540 | 537 | |
|---|
| 541 | 538 | mutex_unlock(&vmbus_connection.channel_mutex); |
|---|
| 542 | | - |
|---|
| 543 | | - if (newchannel->target_cpu != get_cpu()) { |
|---|
| 544 | | - put_cpu(); |
|---|
| 545 | | - smp_call_function_single(newchannel->target_cpu, |
|---|
| 546 | | - percpu_channel_deq, |
|---|
| 547 | | - newchannel, true); |
|---|
| 548 | | - } else { |
|---|
| 549 | | - percpu_channel_deq(newchannel); |
|---|
| 550 | | - put_cpu(); |
|---|
| 551 | | - } |
|---|
| 552 | 539 | |
|---|
| 553 | 540 | vmbus_release_relid(newchannel->offermsg.child_relid); |
|---|
| 554 | 541 | |
|---|
| .. | .. |
|---|
| 563 | 550 | { |
|---|
| 564 | 551 | struct vmbus_channel *channel; |
|---|
| 565 | 552 | struct workqueue_struct *wq; |
|---|
| 566 | | - unsigned long flags; |
|---|
| 567 | 553 | bool fnew = true; |
|---|
| 568 | 554 | |
|---|
| 555 | + /* |
|---|
| 556 | + * Synchronize vmbus_process_offer() and CPU hotplugging: |
|---|
| 557 | + * |
|---|
| 558 | + * CPU1 CPU2 |
|---|
| 559 | + * |
|---|
| 560 | + * [vmbus_process_offer()] [Hot removal of the CPU] |
|---|
| 561 | + * |
|---|
| 562 | + * CPU_READ_LOCK CPUS_WRITE_LOCK |
|---|
| 563 | + * LOAD cpu_online_mask SEARCH chn_list |
|---|
| 564 | + * STORE target_cpu LOAD target_cpu |
|---|
| 565 | + * INSERT chn_list STORE cpu_online_mask |
|---|
| 566 | + * CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK |
|---|
| 567 | + * |
|---|
| 568 | + * Forbids: CPU1's LOAD from *not* seing CPU2's STORE && |
|---|
| 569 | + * CPU2's SEARCH from *not* seeing CPU1's INSERT |
|---|
| 570 | + * |
|---|
| 571 | + * Forbids: CPU2's SEARCH from seeing CPU1's INSERT && |
|---|
| 572 | + * CPU2's LOAD from *not* seing CPU1's STORE |
|---|
| 573 | + */ |
|---|
| 574 | + cpus_read_lock(); |
|---|
| 575 | + |
|---|
| 576 | + /* |
|---|
| 577 | + * Serializes the modifications of the chn_list list as well as |
|---|
| 578 | + * the accesses to next_numa_node_id in init_vp_index(). |
|---|
| 579 | + */ |
|---|
| 569 | 580 | mutex_lock(&vmbus_connection.channel_mutex); |
|---|
| 581 | + |
|---|
| 582 | + init_vp_index(newchannel); |
|---|
| 583 | + |
|---|
| 584 | + /* Remember the channels that should be cleaned up upon suspend. */ |
|---|
| 585 | + if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel)) |
|---|
| 586 | + atomic_inc(&vmbus_connection.nr_chan_close_on_suspend); |
|---|
| 570 | 587 | |
|---|
| 571 | 588 | /* |
|---|
| 572 | 589 | * Now that we have acquired the channel_mutex, |
|---|
| .. | .. |
|---|
| 575 | 592 | atomic_dec(&vmbus_connection.offer_in_progress); |
|---|
| 576 | 593 | |
|---|
| 577 | 594 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { |
|---|
| 578 | | - if (!uuid_le_cmp(channel->offermsg.offer.if_type, |
|---|
| 579 | | - newchannel->offermsg.offer.if_type) && |
|---|
| 580 | | - !uuid_le_cmp(channel->offermsg.offer.if_instance, |
|---|
| 581 | | - newchannel->offermsg.offer.if_instance)) { |
|---|
| 595 | + if (guid_equal(&channel->offermsg.offer.if_type, |
|---|
| 596 | + &newchannel->offermsg.offer.if_type) && |
|---|
| 597 | + guid_equal(&channel->offermsg.offer.if_instance, |
|---|
| 598 | + &newchannel->offermsg.offer.if_instance)) { |
|---|
| 582 | 599 | fnew = false; |
|---|
| 583 | 600 | break; |
|---|
| 584 | 601 | } |
|---|
| 585 | 602 | } |
|---|
| 586 | 603 | |
|---|
| 587 | | - if (fnew) |
|---|
| 604 | + if (fnew) { |
|---|
| 588 | 605 | list_add_tail(&newchannel->listentry, |
|---|
| 589 | 606 | &vmbus_connection.chn_list); |
|---|
| 590 | | - else { |
|---|
| 607 | + } else { |
|---|
| 591 | 608 | /* |
|---|
| 592 | 609 | * Check to see if this is a valid sub-channel. |
|---|
| 593 | 610 | */ |
|---|
| 594 | 611 | if (newchannel->offermsg.offer.sub_channel_index == 0) { |
|---|
| 595 | 612 | mutex_unlock(&vmbus_connection.channel_mutex); |
|---|
| 613 | + cpus_read_unlock(); |
|---|
| 596 | 614 | /* |
|---|
| 597 | 615 | * Don't call free_channel(), because newchannel->kobj |
|---|
| 598 | 616 | * is not initialized yet. |
|---|
| .. | .. |
|---|
| 605 | 623 | * Process the sub-channel. |
|---|
| 606 | 624 | */ |
|---|
| 607 | 625 | newchannel->primary_channel = channel; |
|---|
| 608 | | - spin_lock_irqsave(&channel->lock, flags); |
|---|
| 609 | 626 | list_add_tail(&newchannel->sc_list, &channel->sc_list); |
|---|
| 610 | | - spin_unlock_irqrestore(&channel->lock, flags); |
|---|
| 611 | 627 | } |
|---|
| 612 | 628 | |
|---|
| 629 | + vmbus_channel_map_relid(newchannel); |
|---|
| 630 | + |
|---|
| 613 | 631 | mutex_unlock(&vmbus_connection.channel_mutex); |
|---|
| 632 | + cpus_read_unlock(); |
|---|
| 614 | 633 | |
|---|
| 615 | 634 | /* |
|---|
| 616 | 635 | * vmbus_process_offer() mustn't call channel->sc_creation_callback() |
|---|
| .. | .. |
|---|
| 643 | 662 | * We use this state to statically distribute the channel interrupt load. |
|---|
| 644 | 663 | */ |
|---|
| 645 | 664 | static int next_numa_node_id; |
|---|
| 646 | | -/* |
|---|
| 647 | | - * init_vp_index() accesses global variables like next_numa_node_id, and |
|---|
| 648 | | - * it can run concurrently for primary channels and sub-channels: see |
|---|
| 649 | | - * vmbus_process_offer(), so we need the lock to protect the global |
|---|
| 650 | | - * variables. |
|---|
| 651 | | - */ |
|---|
| 652 | | -static DEFINE_SPINLOCK(bind_channel_to_cpu_lock); |
|---|
| 653 | 665 | |
|---|
| 654 | 666 | /* |
|---|
| 655 | 667 | * Starting with Win8, we can statically distribute the incoming |
|---|
| 656 | 668 | * channel interrupt load by binding a channel to VCPU. |
|---|
| 657 | | - * We distribute the interrupt loads to one or more NUMA nodes based on |
|---|
| 658 | | - * the channel's affinity_policy. |
|---|
| 659 | 669 | * |
|---|
| 660 | 670 | * For pre-win8 hosts or non-performance critical channels we assign the |
|---|
| 661 | | - * first CPU in the first NUMA node. |
|---|
| 671 | + * VMBUS_CONNECT_CPU. |
|---|
| 672 | + * |
|---|
| 673 | + * Starting with win8, performance critical channels will be distributed |
|---|
| 674 | + * evenly among all the available NUMA nodes. Once the node is assigned, |
|---|
| 675 | + * we will assign the CPU based on a simple round robin scheme. |
|---|
| 662 | 676 | */ |
|---|
| 663 | | -static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) |
|---|
| 677 | +static void init_vp_index(struct vmbus_channel *channel) |
|---|
| 664 | 678 | { |
|---|
| 665 | | - u32 cur_cpu; |
|---|
| 666 | | - bool perf_chn = vmbus_devs[dev_type].perf_device; |
|---|
| 667 | | - struct vmbus_channel *primary = channel->primary_channel; |
|---|
| 668 | | - int next_node; |
|---|
| 679 | + bool perf_chn = hv_is_perf_channel(channel); |
|---|
| 669 | 680 | cpumask_var_t available_mask; |
|---|
| 670 | 681 | struct cpumask *alloced_mask; |
|---|
| 682 | + u32 target_cpu; |
|---|
| 683 | + int numa_node; |
|---|
| 671 | 684 | |
|---|
| 672 | 685 | if ((vmbus_proto_version == VERSION_WS2008) || |
|---|
| 673 | 686 | (vmbus_proto_version == VERSION_WIN7) || (!perf_chn) || |
|---|
| 674 | 687 | !alloc_cpumask_var(&available_mask, GFP_KERNEL)) { |
|---|
| 675 | 688 | /* |
|---|
| 676 | 689 | * Prior to win8, all channel interrupts are |
|---|
| 677 | | - * delivered on cpu 0. |
|---|
| 690 | + * delivered on VMBUS_CONNECT_CPU. |
|---|
| 678 | 691 | * Also if the channel is not a performance critical |
|---|
| 679 | | - * channel, bind it to cpu 0. |
|---|
| 680 | | - * In case alloc_cpumask_var() fails, bind it to cpu 0. |
|---|
| 692 | + * channel, bind it to VMBUS_CONNECT_CPU. |
|---|
| 693 | + * In case alloc_cpumask_var() fails, bind it to |
|---|
| 694 | + * VMBUS_CONNECT_CPU. |
|---|
| 681 | 695 | */ |
|---|
| 682 | | - channel->numa_node = 0; |
|---|
| 683 | | - channel->target_cpu = 0; |
|---|
| 684 | | - channel->target_vp = hv_cpu_number_to_vp_number(0); |
|---|
| 696 | + channel->target_cpu = VMBUS_CONNECT_CPU; |
|---|
| 697 | + if (perf_chn) |
|---|
| 698 | + hv_set_alloced_cpu(VMBUS_CONNECT_CPU); |
|---|
| 685 | 699 | return; |
|---|
| 686 | 700 | } |
|---|
| 687 | 701 | |
|---|
| 688 | | - spin_lock(&bind_channel_to_cpu_lock); |
|---|
| 689 | | - |
|---|
| 690 | | - /* |
|---|
| 691 | | - * Based on the channel affinity policy, we will assign the NUMA |
|---|
| 692 | | - * nodes. |
|---|
| 693 | | - */ |
|---|
| 694 | | - |
|---|
| 695 | | - if ((channel->affinity_policy == HV_BALANCED) || (!primary)) { |
|---|
| 696 | | - while (true) { |
|---|
| 697 | | - next_node = next_numa_node_id++; |
|---|
| 698 | | - if (next_node == nr_node_ids) { |
|---|
| 699 | | - next_node = next_numa_node_id = 0; |
|---|
| 700 | | - continue; |
|---|
| 701 | | - } |
|---|
| 702 | | - if (cpumask_empty(cpumask_of_node(next_node))) |
|---|
| 703 | | - continue; |
|---|
| 704 | | - break; |
|---|
| 702 | + while (true) { |
|---|
| 703 | + numa_node = next_numa_node_id++; |
|---|
| 704 | + if (numa_node == nr_node_ids) { |
|---|
| 705 | + next_numa_node_id = 0; |
|---|
| 706 | + continue; |
|---|
| 705 | 707 | } |
|---|
| 706 | | - channel->numa_node = next_node; |
|---|
| 707 | | - primary = channel; |
|---|
| 708 | + if (cpumask_empty(cpumask_of_node(numa_node))) |
|---|
| 709 | + continue; |
|---|
| 710 | + break; |
|---|
| 708 | 711 | } |
|---|
| 709 | | - alloced_mask = &hv_context.hv_numa_map[primary->numa_node]; |
|---|
| 712 | + alloced_mask = &hv_context.hv_numa_map[numa_node]; |
|---|
| 710 | 713 | |
|---|
| 711 | 714 | if (cpumask_weight(alloced_mask) == |
|---|
| 712 | | - cpumask_weight(cpumask_of_node(primary->numa_node))) { |
|---|
| 715 | + cpumask_weight(cpumask_of_node(numa_node))) { |
|---|
| 713 | 716 | /* |
|---|
| 714 | 717 | * We have cycled through all the CPUs in the node; |
|---|
| 715 | 718 | * reset the alloced map. |
|---|
| .. | .. |
|---|
| 717 | 720 | cpumask_clear(alloced_mask); |
|---|
| 718 | 721 | } |
|---|
| 719 | 722 | |
|---|
| 720 | | - cpumask_xor(available_mask, alloced_mask, |
|---|
| 721 | | - cpumask_of_node(primary->numa_node)); |
|---|
| 723 | + cpumask_xor(available_mask, alloced_mask, cpumask_of_node(numa_node)); |
|---|
| 722 | 724 | |
|---|
| 723 | | - cur_cpu = -1; |
|---|
| 725 | + target_cpu = cpumask_first(available_mask); |
|---|
| 726 | + cpumask_set_cpu(target_cpu, alloced_mask); |
|---|
| 724 | 727 | |
|---|
| 725 | | - if (primary->affinity_policy == HV_LOCALIZED) { |
|---|
| 726 | | - /* |
|---|
| 727 | | - * Normally Hyper-V host doesn't create more subchannels |
|---|
| 728 | | - * than there are VCPUs on the node but it is possible when not |
|---|
| 729 | | - * all present VCPUs on the node are initialized by guest. |
|---|
| 730 | | - * Clear the alloced_cpus_in_node to start over. |
|---|
| 731 | | - */ |
|---|
| 732 | | - if (cpumask_equal(&primary->alloced_cpus_in_node, |
|---|
| 733 | | - cpumask_of_node(primary->numa_node))) |
|---|
| 734 | | - cpumask_clear(&primary->alloced_cpus_in_node); |
|---|
| 735 | | - } |
|---|
| 736 | | - |
|---|
| 737 | | - while (true) { |
|---|
| 738 | | - cur_cpu = cpumask_next(cur_cpu, available_mask); |
|---|
| 739 | | - if (cur_cpu >= nr_cpu_ids) { |
|---|
| 740 | | - cur_cpu = -1; |
|---|
| 741 | | - cpumask_copy(available_mask, |
|---|
| 742 | | - cpumask_of_node(primary->numa_node)); |
|---|
| 743 | | - continue; |
|---|
| 744 | | - } |
|---|
| 745 | | - |
|---|
| 746 | | - if (primary->affinity_policy == HV_LOCALIZED) { |
|---|
| 747 | | - /* |
|---|
| 748 | | - * NOTE: in the case of sub-channel, we clear the |
|---|
| 749 | | - * sub-channel related bit(s) in |
|---|
| 750 | | - * primary->alloced_cpus_in_node in |
|---|
| 751 | | - * hv_process_channel_removal(), so when we |
|---|
| 752 | | - * reload drivers like hv_netvsc in SMP guest, here |
|---|
| 753 | | - * we're able to re-allocate |
|---|
| 754 | | - * bit from primary->alloced_cpus_in_node. |
|---|
| 755 | | - */ |
|---|
| 756 | | - if (!cpumask_test_cpu(cur_cpu, |
|---|
| 757 | | - &primary->alloced_cpus_in_node)) { |
|---|
| 758 | | - cpumask_set_cpu(cur_cpu, |
|---|
| 759 | | - &primary->alloced_cpus_in_node); |
|---|
| 760 | | - cpumask_set_cpu(cur_cpu, alloced_mask); |
|---|
| 761 | | - break; |
|---|
| 762 | | - } |
|---|
| 763 | | - } else { |
|---|
| 764 | | - cpumask_set_cpu(cur_cpu, alloced_mask); |
|---|
| 765 | | - break; |
|---|
| 766 | | - } |
|---|
| 767 | | - } |
|---|
| 768 | | - |
|---|
| 769 | | - channel->target_cpu = cur_cpu; |
|---|
| 770 | | - channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu); |
|---|
| 771 | | - |
|---|
| 772 | | - spin_unlock(&bind_channel_to_cpu_lock); |
|---|
| 728 | + channel->target_cpu = target_cpu; |
|---|
| 773 | 729 | |
|---|
| 774 | 730 | free_cpumask_var(available_mask); |
|---|
| 775 | 731 | } |
|---|
| .. | .. |
|---|
| 896 | 852 | vmbus_wait_for_unload(); |
|---|
| 897 | 853 | } |
|---|
| 898 | 854 | |
|---|
| 855 | +static void check_ready_for_resume_event(void) |
|---|
| 856 | +{ |
|---|
| 857 | + /* |
|---|
| 858 | + * If all the old primary channels have been fixed up, then it's safe |
|---|
| 859 | + * to resume. |
|---|
| 860 | + */ |
|---|
| 861 | + if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume)) |
|---|
| 862 | + complete(&vmbus_connection.ready_for_resume_event); |
|---|
| 863 | +} |
|---|
| 864 | + |
|---|
| 865 | +static void vmbus_setup_channel_state(struct vmbus_channel *channel, |
|---|
| 866 | + struct vmbus_channel_offer_channel *offer) |
|---|
| 867 | +{ |
|---|
| 868 | + /* |
|---|
| 869 | + * Setup state for signalling the host. |
|---|
| 870 | + */ |
|---|
| 871 | + channel->sig_event = VMBUS_EVENT_CONNECTION_ID; |
|---|
| 872 | + |
|---|
| 873 | + if (vmbus_proto_version != VERSION_WS2008) { |
|---|
| 874 | + channel->is_dedicated_interrupt = |
|---|
| 875 | + (offer->is_dedicated_interrupt != 0); |
|---|
| 876 | + channel->sig_event = offer->connection_id; |
|---|
| 877 | + } |
|---|
| 878 | + |
|---|
| 879 | + memcpy(&channel->offermsg, offer, |
|---|
| 880 | + sizeof(struct vmbus_channel_offer_channel)); |
|---|
| 881 | + channel->monitor_grp = (u8)offer->monitorid / 32; |
|---|
| 882 | + channel->monitor_bit = (u8)offer->monitorid % 32; |
|---|
| 883 | + channel->device_id = hv_get_dev_type(channel); |
|---|
| 884 | +} |
|---|
| 885 | + |
|---|
| 886 | +/* |
|---|
| 887 | + * find_primary_channel_by_offer - Get the channel object given the new offer. |
|---|
| 888 | + * This is only used in the resume path of hibernation. |
|---|
| 889 | + */ |
|---|
| 890 | +static struct vmbus_channel * |
|---|
| 891 | +find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer) |
|---|
| 892 | +{ |
|---|
| 893 | + struct vmbus_channel *channel = NULL, *iter; |
|---|
| 894 | + const guid_t *inst1, *inst2; |
|---|
| 895 | + |
|---|
| 896 | + /* Ignore sub-channel offers. */ |
|---|
| 897 | + if (offer->offer.sub_channel_index != 0) |
|---|
| 898 | + return NULL; |
|---|
| 899 | + |
|---|
| 900 | + mutex_lock(&vmbus_connection.channel_mutex); |
|---|
| 901 | + |
|---|
| 902 | + list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) { |
|---|
| 903 | + inst1 = &iter->offermsg.offer.if_instance; |
|---|
| 904 | + inst2 = &offer->offer.if_instance; |
|---|
| 905 | + |
|---|
| 906 | + if (guid_equal(inst1, inst2)) { |
|---|
| 907 | + channel = iter; |
|---|
| 908 | + break; |
|---|
| 909 | + } |
|---|
| 910 | + } |
|---|
| 911 | + |
|---|
| 912 | + mutex_unlock(&vmbus_connection.channel_mutex); |
|---|
| 913 | + |
|---|
| 914 | + return channel; |
|---|
| 915 | +} |
|---|
| 916 | + |
|---|
| 899 | 917 | /* |
|---|
| 900 | 918 | * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. |
|---|
| 901 | 919 | * |
|---|
| .. | .. |
|---|
| 903 | 921 | static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) |
|---|
| 904 | 922 | { |
|---|
| 905 | 923 | struct vmbus_channel_offer_channel *offer; |
|---|
| 906 | | - struct vmbus_channel *newchannel; |
|---|
| 924 | + struct vmbus_channel *oldchannel, *newchannel; |
|---|
| 925 | + size_t offer_sz; |
|---|
| 907 | 926 | |
|---|
| 908 | 927 | offer = (struct vmbus_channel_offer_channel *)hdr; |
|---|
| 909 | 928 | |
|---|
| 910 | 929 | trace_vmbus_onoffer(offer); |
|---|
| 930 | + |
|---|
| 931 | + oldchannel = find_primary_channel_by_offer(offer); |
|---|
| 932 | + |
|---|
| 933 | + if (oldchannel != NULL) { |
|---|
| 934 | + /* |
|---|
| 935 | + * We're resuming from hibernation: all the sub-channel and |
|---|
| 936 | + * hv_sock channels we had before the hibernation should have |
|---|
| 937 | + * been cleaned up, and now we must be seeing a re-offered |
|---|
| 938 | + * primary channel that we had before the hibernation. |
|---|
| 939 | + */ |
|---|
| 940 | + |
|---|
| 941 | + /* |
|---|
| 942 | + * { Initially: channel relid = INVALID_RELID, |
|---|
| 943 | + * channels[valid_relid] = NULL } |
|---|
| 944 | + * |
|---|
| 945 | + * CPU1 CPU2 |
|---|
| 946 | + * |
|---|
| 947 | + * [vmbus_onoffer()] [vmbus_device_release()] |
|---|
| 948 | + * |
|---|
| 949 | + * LOCK channel_mutex LOCK channel_mutex |
|---|
| 950 | + * STORE channel relid = valid_relid LOAD r1 = channel relid |
|---|
| 951 | + * MAP_RELID channel if (r1 != INVALID_RELID) |
|---|
| 952 | + * UNLOCK channel_mutex UNMAP_RELID channel |
|---|
| 953 | + * UNLOCK channel_mutex |
|---|
| 954 | + * |
|---|
| 955 | + * Forbids: r1 == valid_relid && |
|---|
| 956 | + * channels[valid_relid] == channel |
|---|
| 957 | + * |
|---|
| 958 | + * Note. r1 can be INVALID_RELID only for an hv_sock channel. |
|---|
| 959 | + * None of the hv_sock channels which were present before the |
|---|
| 960 | + * suspend are re-offered upon the resume. See the WARN_ON() |
|---|
| 961 | + * in hv_process_channel_removal(). |
|---|
| 962 | + */ |
|---|
| 963 | + mutex_lock(&vmbus_connection.channel_mutex); |
|---|
| 964 | + |
|---|
| 965 | + atomic_dec(&vmbus_connection.offer_in_progress); |
|---|
| 966 | + |
|---|
| 967 | + WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID); |
|---|
| 968 | + /* Fix up the relid. */ |
|---|
| 969 | + oldchannel->offermsg.child_relid = offer->child_relid; |
|---|
| 970 | + |
|---|
| 971 | + offer_sz = sizeof(*offer); |
|---|
| 972 | + if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) { |
|---|
| 973 | + /* |
|---|
| 974 | + * This is not an error, since the host can also change |
|---|
| 975 | + * the other field(s) of the offer, e.g. on WS RS5 |
|---|
| 976 | + * (Build 17763), the offer->connection_id of the |
|---|
| 977 | + * Mellanox VF vmbus device can change when the host |
|---|
| 978 | + * reoffers the device upon resume. |
|---|
| 979 | + */ |
|---|
| 980 | + pr_debug("vmbus offer changed: relid=%d\n", |
|---|
| 981 | + offer->child_relid); |
|---|
| 982 | + |
|---|
| 983 | + print_hex_dump_debug("Old vmbus offer: ", |
|---|
| 984 | + DUMP_PREFIX_OFFSET, 16, 4, |
|---|
| 985 | + &oldchannel->offermsg, offer_sz, |
|---|
| 986 | + false); |
|---|
| 987 | + print_hex_dump_debug("New vmbus offer: ", |
|---|
| 988 | + DUMP_PREFIX_OFFSET, 16, 4, |
|---|
| 989 | + offer, offer_sz, false); |
|---|
| 990 | + |
|---|
| 991 | + /* Fix up the old channel. */ |
|---|
| 992 | + vmbus_setup_channel_state(oldchannel, offer); |
|---|
| 993 | + } |
|---|
| 994 | + |
|---|
| 995 | + /* Add the channel back to the array of channels. */ |
|---|
| 996 | + vmbus_channel_map_relid(oldchannel); |
|---|
| 997 | + check_ready_for_resume_event(); |
|---|
| 998 | + |
|---|
| 999 | + mutex_unlock(&vmbus_connection.channel_mutex); |
|---|
| 1000 | + return; |
|---|
| 1001 | + } |
|---|
| 911 | 1002 | |
|---|
| 912 | 1003 | /* Allocate the channel object and save this offer. */ |
|---|
| 913 | 1004 | newchannel = alloc_channel(); |
|---|
| .. | .. |
|---|
| 918 | 1009 | return; |
|---|
| 919 | 1010 | } |
|---|
| 920 | 1011 | |
|---|
| 921 | | - /* |
|---|
| 922 | | - * Setup state for signalling the host. |
|---|
| 923 | | - */ |
|---|
| 924 | | - newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID; |
|---|
| 925 | | - |
|---|
| 926 | | - if (vmbus_proto_version != VERSION_WS2008) { |
|---|
| 927 | | - newchannel->is_dedicated_interrupt = |
|---|
| 928 | | - (offer->is_dedicated_interrupt != 0); |
|---|
| 929 | | - newchannel->sig_event = offer->connection_id; |
|---|
| 930 | | - } |
|---|
| 931 | | - |
|---|
| 932 | | - memcpy(&newchannel->offermsg, offer, |
|---|
| 933 | | - sizeof(struct vmbus_channel_offer_channel)); |
|---|
| 934 | | - newchannel->monitor_grp = (u8)offer->monitorid / 32; |
|---|
| 935 | | - newchannel->monitor_bit = (u8)offer->monitorid % 32; |
|---|
| 1012 | + vmbus_setup_channel_state(newchannel, offer); |
|---|
| 936 | 1013 | |
|---|
| 937 | 1014 | vmbus_process_offer(newchannel); |
|---|
| 1015 | +} |
|---|
| 1016 | + |
|---|
| 1017 | +static void check_ready_for_suspend_event(void) |
|---|
| 1018 | +{ |
|---|
| 1019 | + /* |
|---|
| 1020 | + * If all the sub-channels or hv_sock channels have been cleaned up, |
|---|
| 1021 | + * then it's safe to suspend. |
|---|
| 1022 | + */ |
|---|
| 1023 | + if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend)) |
|---|
| 1024 | + complete(&vmbus_connection.ready_for_suspend_event); |
|---|
| 938 | 1025 | } |
|---|
| 939 | 1026 | |
|---|
| 940 | 1027 | /* |
|---|
| .. | .. |
|---|
| 947 | 1034 | struct vmbus_channel_rescind_offer *rescind; |
|---|
| 948 | 1035 | struct vmbus_channel *channel; |
|---|
| 949 | 1036 | struct device *dev; |
|---|
| 1037 | + bool clean_up_chan_for_suspend; |
|---|
| 950 | 1038 | |
|---|
| 951 | 1039 | rescind = (struct vmbus_channel_rescind_offer *)hdr; |
|---|
| 952 | 1040 | |
|---|
| .. | .. |
|---|
| 958 | 1046 | * offer comes in first and then the rescind. |
|---|
| 959 | 1047 | * Since we process these events in work elements, |
|---|
| 960 | 1048 | * and with preemption, we may end up processing |
|---|
| 961 | | - * the events out of order. Given that we handle these |
|---|
| 962 | | - * work elements on the same CPU, this is possible only |
|---|
| 963 | | - * in the case of preemption. In any case wait here |
|---|
| 964 | | - * until the offer processing has moved beyond the |
|---|
| 965 | | - * point where the channel is discoverable. |
|---|
| 1049 | + * the events out of order. We rely on the synchronization |
|---|
| 1050 | + * provided by offer_in_progress and by channel_mutex for |
|---|
| 1051 | + * ordering these events: |
|---|
| 1052 | + * |
|---|
| 1053 | + * { Initially: offer_in_progress = 1 } |
|---|
| 1054 | + * |
|---|
| 1055 | + * CPU1 CPU2 |
|---|
| 1056 | + * |
|---|
| 1057 | + * [vmbus_onoffer()] [vmbus_onoffer_rescind()] |
|---|
| 1058 | + * |
|---|
| 1059 | + * LOCK channel_mutex WAIT_ON offer_in_progress == 0 |
|---|
| 1060 | + * DECREMENT offer_in_progress LOCK channel_mutex |
|---|
| 1061 | + * STORE channels[] LOAD channels[] |
|---|
| 1062 | + * UNLOCK channel_mutex UNLOCK channel_mutex |
|---|
| 1063 | + * |
|---|
| 1064 | + * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE |
|---|
| 966 | 1065 | */ |
|---|
| 967 | 1066 | |
|---|
| 968 | 1067 | while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { |
|---|
| .. | .. |
|---|
| 986 | 1085 | return; |
|---|
| 987 | 1086 | } |
|---|
| 988 | 1087 | |
|---|
| 1088 | + clean_up_chan_for_suspend = is_hvsock_channel(channel) || |
|---|
| 1089 | + is_sub_channel(channel); |
|---|
| 989 | 1090 | /* |
|---|
| 990 | 1091 | * Before setting channel->rescind in vmbus_rescind_cleanup(), we |
|---|
| 991 | 1092 | * should make sure the channel callback is not running any more. |
|---|
| .. | .. |
|---|
| 1011 | 1112 | if (channel->device_obj) { |
|---|
| 1012 | 1113 | if (channel->chn_rescind_callback) { |
|---|
| 1013 | 1114 | channel->chn_rescind_callback(channel); |
|---|
| 1115 | + |
|---|
| 1116 | + if (clean_up_chan_for_suspend) |
|---|
| 1117 | + check_ready_for_suspend_event(); |
|---|
| 1118 | + |
|---|
| 1014 | 1119 | return; |
|---|
| 1015 | 1120 | } |
|---|
| 1016 | 1121 | /* |
|---|
| .. | .. |
|---|
| 1036 | 1141 | * The channel is currently not open; |
|---|
| 1037 | 1142 | * it is safe for us to cleanup the channel. |
|---|
| 1038 | 1143 | */ |
|---|
| 1039 | | - hv_process_channel_removal(rescind->child_relid); |
|---|
| 1144 | + hv_process_channel_removal(channel); |
|---|
| 1040 | 1145 | } else { |
|---|
| 1041 | 1146 | complete(&channel->rescind_event); |
|---|
| 1042 | 1147 | } |
|---|
| 1043 | 1148 | mutex_unlock(&vmbus_connection.channel_mutex); |
|---|
| 1044 | 1149 | } |
|---|
| 1150 | + |
|---|
| 1151 | + /* The "channel" may have been freed. Do not access it any longer. */ |
|---|
| 1152 | + |
|---|
| 1153 | + if (clean_up_chan_for_suspend) |
|---|
| 1154 | + check_ready_for_suspend_event(); |
|---|
| 1045 | 1155 | } |
|---|
| 1046 | 1156 | |
|---|
| 1047 | 1157 | void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) |
|---|
| .. | .. |
|---|
| 1250 | 1360 | /* Channel message dispatch table */ |
|---|
| 1251 | 1361 | const struct vmbus_channel_message_table_entry |
|---|
| 1252 | 1362 | channel_message_table[CHANNELMSG_COUNT] = { |
|---|
| 1253 | | - { CHANNELMSG_INVALID, 0, NULL }, |
|---|
| 1254 | | - { CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer }, |
|---|
| 1255 | | - { CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind }, |
|---|
| 1256 | | - { CHANNELMSG_REQUESTOFFERS, 0, NULL }, |
|---|
| 1257 | | - { CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered }, |
|---|
| 1258 | | - { CHANNELMSG_OPENCHANNEL, 0, NULL }, |
|---|
| 1259 | | - { CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result }, |
|---|
| 1260 | | - { CHANNELMSG_CLOSECHANNEL, 0, NULL }, |
|---|
| 1261 | | - { CHANNELMSG_GPADL_HEADER, 0, NULL }, |
|---|
| 1262 | | - { CHANNELMSG_GPADL_BODY, 0, NULL }, |
|---|
| 1263 | | - { CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created }, |
|---|
| 1264 | | - { CHANNELMSG_GPADL_TEARDOWN, 0, NULL }, |
|---|
| 1265 | | - { CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown }, |
|---|
| 1266 | | - { CHANNELMSG_RELID_RELEASED, 0, NULL }, |
|---|
| 1267 | | - { CHANNELMSG_INITIATE_CONTACT, 0, NULL }, |
|---|
| 1268 | | - { CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response }, |
|---|
| 1269 | | - { CHANNELMSG_UNLOAD, 0, NULL }, |
|---|
| 1270 | | - { CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response }, |
|---|
| 1271 | | - { CHANNELMSG_18, 0, NULL }, |
|---|
| 1272 | | - { CHANNELMSG_19, 0, NULL }, |
|---|
| 1273 | | - { CHANNELMSG_20, 0, NULL }, |
|---|
| 1274 | | - { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL }, |
|---|
| 1275 | | - { CHANNELMSG_22, 0, NULL }, |
|---|
| 1276 | | - { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL }, |
|---|
| 1363 | + { CHANNELMSG_INVALID, 0, NULL, 0}, |
|---|
| 1364 | + { CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer, |
|---|
| 1365 | + sizeof(struct vmbus_channel_offer_channel)}, |
|---|
| 1366 | + { CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind, |
|---|
| 1367 | + sizeof(struct vmbus_channel_rescind_offer) }, |
|---|
| 1368 | + { CHANNELMSG_REQUESTOFFERS, 0, NULL, 0}, |
|---|
| 1369 | + { CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered, 0}, |
|---|
| 1370 | + { CHANNELMSG_OPENCHANNEL, 0, NULL, 0}, |
|---|
| 1371 | + { CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result, |
|---|
| 1372 | + sizeof(struct vmbus_channel_open_result)}, |
|---|
| 1373 | + { CHANNELMSG_CLOSECHANNEL, 0, NULL, 0}, |
|---|
| 1374 | + { CHANNELMSG_GPADL_HEADER, 0, NULL, 0}, |
|---|
| 1375 | + { CHANNELMSG_GPADL_BODY, 0, NULL, 0}, |
|---|
| 1376 | + { CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created, |
|---|
| 1377 | + sizeof(struct vmbus_channel_gpadl_created)}, |
|---|
| 1378 | + { CHANNELMSG_GPADL_TEARDOWN, 0, NULL, 0}, |
|---|
| 1379 | + { CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown, |
|---|
| 1380 | + sizeof(struct vmbus_channel_gpadl_torndown) }, |
|---|
| 1381 | + { CHANNELMSG_RELID_RELEASED, 0, NULL, 0}, |
|---|
| 1382 | + { CHANNELMSG_INITIATE_CONTACT, 0, NULL, 0}, |
|---|
| 1383 | + { CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response, |
|---|
| 1384 | + sizeof(struct vmbus_channel_version_response)}, |
|---|
| 1385 | + { CHANNELMSG_UNLOAD, 0, NULL, 0}, |
|---|
| 1386 | + { CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response, 0}, |
|---|
| 1387 | + { CHANNELMSG_18, 0, NULL, 0}, |
|---|
| 1388 | + { CHANNELMSG_19, 0, NULL, 0}, |
|---|
| 1389 | + { CHANNELMSG_20, 0, NULL, 0}, |
|---|
| 1390 | + { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0}, |
|---|
| 1391 | + { CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0}, |
|---|
| 1392 | + { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0}, |
|---|
| 1277 | 1393 | }; |
|---|
| 1278 | 1394 | |
|---|
| 1279 | 1395 | /* |
|---|
| .. | .. |
|---|
| 1281 | 1397 | * |
|---|
| 1282 | 1398 | * This is invoked in the vmbus worker thread context. |
|---|
| 1283 | 1399 | */ |
|---|
| 1284 | | -void vmbus_onmessage(void *context) |
|---|
| 1400 | +void vmbus_onmessage(struct vmbus_channel_message_header *hdr) |
|---|
| 1285 | 1401 | { |
|---|
| 1286 | | - struct hv_message *msg = context; |
|---|
| 1287 | | - struct vmbus_channel_message_header *hdr; |
|---|
| 1288 | | - |
|---|
| 1289 | | - hdr = (struct vmbus_channel_message_header *)msg->u.payload; |
|---|
| 1290 | | - |
|---|
| 1291 | 1402 | trace_vmbus_on_message(hdr); |
|---|
| 1292 | 1403 | |
|---|
| 1293 | 1404 | /* |
|---|
| .. | .. |
|---|
| 1332 | 1443 | |
|---|
| 1333 | 1444 | return ret; |
|---|
| 1334 | 1445 | } |
|---|
| 1335 | | - |
|---|
| 1336 | | -/* |
|---|
| 1337 | | - * Retrieve the (sub) channel on which to send an outgoing request. |
|---|
| 1338 | | - * When a primary channel has multiple sub-channels, we try to |
|---|
| 1339 | | - * distribute the load equally amongst all available channels. |
|---|
| 1340 | | - */ |
|---|
| 1341 | | -struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary) |
|---|
| 1342 | | -{ |
|---|
| 1343 | | - struct list_head *cur, *tmp; |
|---|
| 1344 | | - int cur_cpu; |
|---|
| 1345 | | - struct vmbus_channel *cur_channel; |
|---|
| 1346 | | - struct vmbus_channel *outgoing_channel = primary; |
|---|
| 1347 | | - int next_channel; |
|---|
| 1348 | | - int i = 1; |
|---|
| 1349 | | - |
|---|
| 1350 | | - if (list_empty(&primary->sc_list)) |
|---|
| 1351 | | - return outgoing_channel; |
|---|
| 1352 | | - |
|---|
| 1353 | | - next_channel = primary->next_oc++; |
|---|
| 1354 | | - |
|---|
| 1355 | | - if (next_channel > (primary->num_sc)) { |
|---|
| 1356 | | - primary->next_oc = 0; |
|---|
| 1357 | | - return outgoing_channel; |
|---|
| 1358 | | - } |
|---|
| 1359 | | - |
|---|
| 1360 | | - cur_cpu = hv_cpu_number_to_vp_number(smp_processor_id()); |
|---|
| 1361 | | - list_for_each_safe(cur, tmp, &primary->sc_list) { |
|---|
| 1362 | | - cur_channel = list_entry(cur, struct vmbus_channel, sc_list); |
|---|
| 1363 | | - if (cur_channel->state != CHANNEL_OPENED_STATE) |
|---|
| 1364 | | - continue; |
|---|
| 1365 | | - |
|---|
| 1366 | | - if (cur_channel->target_vp == cur_cpu) |
|---|
| 1367 | | - return cur_channel; |
|---|
| 1368 | | - |
|---|
| 1369 | | - if (i == next_channel) |
|---|
| 1370 | | - return cur_channel; |
|---|
| 1371 | | - |
|---|
| 1372 | | - i++; |
|---|
| 1373 | | - } |
|---|
| 1374 | | - |
|---|
| 1375 | | - return outgoing_channel; |
|---|
| 1376 | | -} |
|---|
| 1377 | | -EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel); |
|---|
| 1378 | 1446 | |
|---|
| 1379 | 1447 | static void invoke_sc_cb(struct vmbus_channel *primary_channel) |
|---|
| 1380 | 1448 | { |
|---|