| .. | .. |
|---|
| 1 | +/* SPDX-License-Identifier: GPL-2.0-or-later */ |
|---|
| 1 | 2 | /* |
|---|
| 2 | 3 | * Implement AES algorithm in Intel AES-NI instructions. |
|---|
| 3 | 4 | * |
|---|
| .. | .. |
|---|
| 22 | 23 | * |
|---|
| 23 | 24 | * Ported x86_64 version to x86: |
|---|
| 24 | 25 | * Author: Mathias Krause <minipli@googlemail.com> |
|---|
| 25 | | - * |
|---|
| 26 | | - * This program is free software; you can redistribute it and/or modify |
|---|
| 27 | | - * it under the terms of the GNU General Public License as published by |
|---|
| 28 | | - * the Free Software Foundation; either version 2 of the License, or |
|---|
| 29 | | - * (at your option) any later version. |
|---|
| 30 | 26 | */ |
|---|
| 31 | 27 | |
|---|
| 32 | 28 | #include <linux/linkage.h> |
|---|
| 33 | | -#include <asm/inst.h> |
|---|
| 34 | 29 | #include <asm/frame.h> |
|---|
| 35 | 30 | #include <asm/nospec-branch.h> |
|---|
| 36 | 31 | |
|---|
| .. | .. |
|---|
| 205 | 200 | mov \SUBKEY, %r12 |
|---|
| 206 | 201 | movdqu (%r12), \TMP3 |
|---|
| 207 | 202 | movdqa SHUF_MASK(%rip), \TMP2 |
|---|
| 208 | | - PSHUFB_XMM \TMP2, \TMP3 |
|---|
| 203 | + pshufb \TMP2, \TMP3 |
|---|
| 209 | 204 | |
|---|
| 210 | 205 | # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) |
|---|
| 211 | 206 | |
|---|
| .. | .. |
|---|
| 267 | 262 | movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv |
|---|
| 268 | 263 | |
|---|
| 269 | 264 | movdqa SHUF_MASK(%rip), %xmm2 |
|---|
| 270 | | - PSHUFB_XMM %xmm2, %xmm0 |
|---|
| 265 | + pshufb %xmm2, %xmm0 |
|---|
| 271 | 266 | movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv |
|---|
| 272 | 267 | |
|---|
| 273 | 268 | PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7 |
|---|
| .. | .. |
|---|
| 323 | 318 | |
|---|
| 324 | 319 | # Main loop - Encrypt/Decrypt remaining blocks |
|---|
| 325 | 320 | |
|---|
| 326 | | - cmp $0, %r13 |
|---|
| 321 | + test %r13, %r13 |
|---|
| 327 | 322 | je _zero_cipher_left_\@ |
|---|
| 328 | 323 | sub $64, %r13 |
|---|
| 329 | 324 | je _four_cipher_left_\@ |
|---|
| .. | .. |
|---|
| 351 | 346 | paddd ONE(%rip), %xmm0 # INCR CNT to get Yn |
|---|
| 352 | 347 | movdqu %xmm0, CurCount(%arg2) |
|---|
| 353 | 348 | movdqa SHUF_MASK(%rip), %xmm10 |
|---|
| 354 | | - PSHUFB_XMM %xmm10, %xmm0 |
|---|
| 349 | + pshufb %xmm10, %xmm0 |
|---|
| 355 | 350 | |
|---|
| 356 | 351 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) |
|---|
| 357 | 352 | movdqu %xmm0, PBlockEncKey(%arg2) |
|---|
| .. | .. |
|---|
| 381 | 376 | # get the appropriate shuffle mask |
|---|
| 382 | 377 | movdqu (%r12), %xmm2 |
|---|
| 383 | 378 | # shift right 16-r13 bytes |
|---|
| 384 | | - PSHUFB_XMM %xmm2, %xmm1 |
|---|
| 379 | + pshufb %xmm2, %xmm1 |
|---|
| 385 | 380 | |
|---|
| 386 | 381 | _data_read_\@: |
|---|
| 387 | 382 | lea ALL_F+16(%rip), %r12 |
|---|
| .. | .. |
|---|
| 397 | 392 | .ifc \operation, dec |
|---|
| 398 | 393 | pand %xmm1, %xmm2 |
|---|
| 399 | 394 | movdqa SHUF_MASK(%rip), %xmm10 |
|---|
| 400 | | - PSHUFB_XMM %xmm10 ,%xmm2 |
|---|
| 395 | + pshufb %xmm10 ,%xmm2 |
|---|
| 401 | 396 | |
|---|
| 402 | 397 | pxor %xmm2, %xmm8 |
|---|
| 403 | 398 | .else |
|---|
| 404 | 399 | movdqa SHUF_MASK(%rip), %xmm10 |
|---|
| 405 | | - PSHUFB_XMM %xmm10,%xmm0 |
|---|
| 400 | + pshufb %xmm10,%xmm0 |
|---|
| 406 | 401 | |
|---|
| 407 | 402 | pxor %xmm0, %xmm8 |
|---|
| 408 | 403 | .endif |
|---|
| .. | .. |
|---|
| 412 | 407 | # GHASH computation for the last <16 byte block |
|---|
| 413 | 408 | movdqa SHUF_MASK(%rip), %xmm10 |
|---|
| 414 | 409 | # shuffle xmm0 back to output as ciphertext |
|---|
| 415 | | - PSHUFB_XMM %xmm10, %xmm0 |
|---|
| 410 | + pshufb %xmm10, %xmm0 |
|---|
| 416 | 411 | .endif |
|---|
| 417 | 412 | |
|---|
| 418 | 413 | # Output %r13 bytes |
|---|
| 419 | | - MOVQ_R64_XMM %xmm0, %rax |
|---|
| 414 | + movq %xmm0, %rax |
|---|
| 420 | 415 | cmp $8, %r13 |
|---|
| 421 | 416 | jle _less_than_8_bytes_left_\@ |
|---|
| 422 | 417 | mov %rax, (%arg3 , %r11, 1) |
|---|
| 423 | 418 | add $8, %r11 |
|---|
| 424 | 419 | psrldq $8, %xmm0 |
|---|
| 425 | | - MOVQ_R64_XMM %xmm0, %rax |
|---|
| 420 | + movq %xmm0, %rax |
|---|
| 426 | 421 | sub $8, %r13 |
|---|
| 427 | 422 | _less_than_8_bytes_left_\@: |
|---|
| 428 | 423 | mov %al, (%arg3, %r11, 1) |
|---|
| .. | .. |
|---|
| 442 | 437 | |
|---|
| 443 | 438 | mov PBlockLen(%arg2), %r12 |
|---|
| 444 | 439 | |
|---|
| 445 | | - cmp $0, %r12 |
|---|
| 440 | + test %r12, %r12 |
|---|
| 446 | 441 | je _partial_done\@ |
|---|
| 447 | 442 | |
|---|
| 448 | 443 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 |
|---|
| .. | .. |
|---|
| 453 | 448 | movd %r12d, %xmm15 # len(A) in %xmm15 |
|---|
| 454 | 449 | mov InLen(%arg2), %r12 |
|---|
| 455 | 450 | shl $3, %r12 # len(C) in bits (*128) |
|---|
| 456 | | - MOVQ_R64_XMM %r12, %xmm1 |
|---|
| 451 | + movq %r12, %xmm1 |
|---|
| 457 | 452 | |
|---|
| 458 | 453 | pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 |
|---|
| 459 | 454 | pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) |
|---|
| .. | .. |
|---|
| 461 | 456 | GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 |
|---|
| 462 | 457 | # final GHASH computation |
|---|
| 463 | 458 | movdqa SHUF_MASK(%rip), %xmm10 |
|---|
| 464 | | - PSHUFB_XMM %xmm10, %xmm8 |
|---|
| 459 | + pshufb %xmm10, %xmm8 |
|---|
| 465 | 460 | |
|---|
| 466 | 461 | movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0 |
|---|
| 467 | 462 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) |
|---|
| .. | .. |
|---|
| 474 | 469 | cmp $8, %r11 |
|---|
| 475 | 470 | jl _T_4_\@ |
|---|
| 476 | 471 | _T_8_\@: |
|---|
| 477 | | - MOVQ_R64_XMM %xmm0, %rax |
|---|
| 472 | + movq %xmm0, %rax |
|---|
| 478 | 473 | mov %rax, (%r10) |
|---|
| 479 | 474 | add $8, %r10 |
|---|
| 480 | 475 | sub $8, %r11 |
|---|
| 481 | 476 | psrldq $8, %xmm0 |
|---|
| 482 | | - cmp $0, %r11 |
|---|
| 477 | + test %r11, %r11 |
|---|
| 483 | 478 | je _return_T_done_\@ |
|---|
| 484 | 479 | _T_4_\@: |
|---|
| 485 | 480 | movd %xmm0, %eax |
|---|
| .. | .. |
|---|
| 487 | 482 | add $4, %r10 |
|---|
| 488 | 483 | sub $4, %r11 |
|---|
| 489 | 484 | psrldq $4, %xmm0 |
|---|
| 490 | | - cmp $0, %r11 |
|---|
| 485 | + test %r11, %r11 |
|---|
| 491 | 486 | je _return_T_done_\@ |
|---|
| 492 | 487 | _T_123_\@: |
|---|
| 493 | 488 | movd %xmm0, %eax |
|---|
| .. | .. |
|---|
| 522 | 517 | pshufd $78, \HK, \TMP3 |
|---|
| 523 | 518 | pxor \GH, \TMP2 # TMP2 = a1+a0 |
|---|
| 524 | 519 | pxor \HK, \TMP3 # TMP3 = b1+b0 |
|---|
| 525 | | - PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1 |
|---|
| 526 | | - PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0 |
|---|
| 527 | | - PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) |
|---|
| 520 | + pclmulqdq $0x11, \HK, \TMP1 # TMP1 = a1*b1 |
|---|
| 521 | + pclmulqdq $0x00, \HK, \GH # GH = a0*b0 |
|---|
| 522 | + pclmulqdq $0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) |
|---|
| 528 | 523 | pxor \GH, \TMP2 |
|---|
| 529 | 524 | pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) |
|---|
| 530 | 525 | movdqa \TMP2, \TMP3 |
|---|
| .. | .. |
|---|
| 574 | 569 | cmp $8, \DLEN |
|---|
| 575 | 570 | jl _read_lt8_\@ |
|---|
| 576 | 571 | mov (\DPTR), %rax |
|---|
| 577 | | - MOVQ_R64_XMM %rax, \XMMDst |
|---|
| 572 | + movq %rax, \XMMDst |
|---|
| 578 | 573 | sub $8, \DLEN |
|---|
| 579 | 574 | jz _done_read_partial_block_\@ |
|---|
| 580 | 575 | xor %eax, %eax |
|---|
| .. | .. |
|---|
| 583 | 578 | mov 7(\DPTR, \DLEN, 1), %al |
|---|
| 584 | 579 | dec \DLEN |
|---|
| 585 | 580 | jnz _read_next_byte_\@ |
|---|
| 586 | | - MOVQ_R64_XMM %rax, \XMM1 |
|---|
| 581 | + movq %rax, \XMM1 |
|---|
| 587 | 582 | pslldq $8, \XMM1 |
|---|
| 588 | 583 | por \XMM1, \XMMDst |
|---|
| 589 | 584 | jmp _done_read_partial_block_\@ |
|---|
| .. | .. |
|---|
| 594 | 589 | mov -1(\DPTR, \DLEN, 1), %al |
|---|
| 595 | 590 | dec \DLEN |
|---|
| 596 | 591 | jnz _read_next_byte_lt8_\@ |
|---|
| 597 | | - MOVQ_R64_XMM %rax, \XMMDst |
|---|
| 592 | + movq %rax, \XMMDst |
|---|
| 598 | 593 | _done_read_partial_block_\@: |
|---|
| 599 | 594 | .endm |
|---|
| 600 | 595 | |
|---|
| .. | .. |
|---|
| 612 | 607 | jl _get_AAD_rest\@ |
|---|
| 613 | 608 | _get_AAD_blocks\@: |
|---|
| 614 | 609 | movdqu (%r10), \TMP7 |
|---|
| 615 | | - PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data |
|---|
| 610 | + pshufb %xmm14, \TMP7 # byte-reflect the AAD data |
|---|
| 616 | 611 | pxor \TMP7, \TMP6 |
|---|
| 617 | 612 | GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 |
|---|
| 618 | 613 | add $16, %r10 |
|---|
| .. | .. |
|---|
| 624 | 619 | |
|---|
| 625 | 620 | /* read the last <16B of AAD */ |
|---|
| 626 | 621 | _get_AAD_rest\@: |
|---|
| 627 | | - cmp $0, %r11 |
|---|
| 622 | + test %r11, %r11 |
|---|
| 628 | 623 | je _get_AAD_done\@ |
|---|
| 629 | 624 | |
|---|
| 630 | 625 | READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 |
|---|
| 631 | | - PSHUFB_XMM %xmm14, \TMP7 # byte-reflect the AAD data |
|---|
| 626 | + pshufb %xmm14, \TMP7 # byte-reflect the AAD data |
|---|
| 632 | 627 | pxor \TMP6, \TMP7 |
|---|
| 633 | 628 | GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5 |
|---|
| 634 | 629 | movdqu \TMP7, \TMP6 |
|---|
| .. | .. |
|---|
| 645 | 640 | .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ |
|---|
| 646 | 641 | AAD_HASH operation |
|---|
| 647 | 642 | mov PBlockLen(%arg2), %r13 |
|---|
| 648 | | - cmp $0, %r13 |
|---|
| 643 | + test %r13, %r13 |
|---|
| 649 | 644 | je _partial_block_done_\@ # Leave Macro if no partial blocks |
|---|
| 650 | 645 | # Read in input data without over reading |
|---|
| 651 | 646 | cmp $16, \PLAIN_CYPH_LEN |
|---|
| .. | .. |
|---|
| 671 | 666 | # r16-r13 is the number of bytes in plaintext mod 16) |
|---|
| 672 | 667 | add %r13, %r12 |
|---|
| 673 | 668 | movdqu (%r12), %xmm2 # get the appropriate shuffle mask |
|---|
| 674 | | - PSHUFB_XMM %xmm2, %xmm9 # shift right r13 bytes |
|---|
| 669 | + pshufb %xmm2, %xmm9 # shift right r13 bytes |
|---|
| 675 | 670 | |
|---|
| 676 | 671 | .ifc \operation, dec |
|---|
| 677 | 672 | movdqa %xmm1, %xmm3 |
|---|
| .. | .. |
|---|
| 693 | 688 | |
|---|
| 694 | 689 | pand %xmm1, %xmm3 |
|---|
| 695 | 690 | movdqa SHUF_MASK(%rip), %xmm10 |
|---|
| 696 | | - PSHUFB_XMM %xmm10, %xmm3 |
|---|
| 697 | | - PSHUFB_XMM %xmm2, %xmm3 |
|---|
| 691 | + pshufb %xmm10, %xmm3 |
|---|
| 692 | + pshufb %xmm2, %xmm3 |
|---|
| 698 | 693 | pxor %xmm3, \AAD_HASH |
|---|
| 699 | 694 | |
|---|
| 700 | | - cmp $0, %r10 |
|---|
| 695 | + test %r10, %r10 |
|---|
| 701 | 696 | jl _partial_incomplete_1_\@ |
|---|
| 702 | 697 | |
|---|
| 703 | 698 | # GHASH computation for the last <16 Byte block |
|---|
| .. | .. |
|---|
| 728 | 723 | pand %xmm1, %xmm9 |
|---|
| 729 | 724 | |
|---|
| 730 | 725 | movdqa SHUF_MASK(%rip), %xmm1 |
|---|
| 731 | | - PSHUFB_XMM %xmm1, %xmm9 |
|---|
| 732 | | - PSHUFB_XMM %xmm2, %xmm9 |
|---|
| 726 | + pshufb %xmm1, %xmm9 |
|---|
| 727 | + pshufb %xmm2, %xmm9 |
|---|
| 733 | 728 | pxor %xmm9, \AAD_HASH |
|---|
| 734 | 729 | |
|---|
| 735 | | - cmp $0, %r10 |
|---|
| 730 | + test %r10, %r10 |
|---|
| 736 | 731 | jl _partial_incomplete_2_\@ |
|---|
| 737 | 732 | |
|---|
| 738 | 733 | # GHASH computation for the last <16 Byte block |
|---|
| .. | .. |
|---|
| 748 | 743 | |
|---|
| 749 | 744 | movdqa SHUF_MASK(%rip), %xmm10 |
|---|
| 750 | 745 | # shuffle xmm9 back to output as ciphertext |
|---|
| 751 | | - PSHUFB_XMM %xmm10, %xmm9 |
|---|
| 752 | | - PSHUFB_XMM %xmm2, %xmm9 |
|---|
| 746 | + pshufb %xmm10, %xmm9 |
|---|
| 747 | + pshufb %xmm2, %xmm9 |
|---|
| 753 | 748 | .endif |
|---|
| 754 | 749 | # output encrypted Bytes |
|---|
| 755 | | - cmp $0, %r10 |
|---|
| 750 | + test %r10, %r10 |
|---|
| 756 | 751 | jl _partial_fill_\@ |
|---|
| 757 | 752 | mov %r13, %r12 |
|---|
| 758 | 753 | mov $16, %r13 |
|---|
| .. | .. |
|---|
| 763 | 758 | mov \PLAIN_CYPH_LEN, %r13 |
|---|
| 764 | 759 | _count_set_\@: |
|---|
| 765 | 760 | movdqa %xmm9, %xmm0 |
|---|
| 766 | | - MOVQ_R64_XMM %xmm0, %rax |
|---|
| 761 | + movq %xmm0, %rax |
|---|
| 767 | 762 | cmp $8, %r13 |
|---|
| 768 | 763 | jle _less_than_8_bytes_left_\@ |
|---|
| 769 | 764 | |
|---|
| 770 | 765 | mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) |
|---|
| 771 | 766 | add $8, \DATA_OFFSET |
|---|
| 772 | 767 | psrldq $8, %xmm0 |
|---|
| 773 | | - MOVQ_R64_XMM %xmm0, %rax |
|---|
| 768 | + movq %xmm0, %rax |
|---|
| 774 | 769 | sub $8, %r13 |
|---|
| 775 | 770 | _less_than_8_bytes_left_\@: |
|---|
| 776 | 771 | movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1) |
|---|
| .. | .. |
|---|
| 814 | 809 | .else |
|---|
| 815 | 810 | MOVADQ \XMM0, %xmm\index |
|---|
| 816 | 811 | .endif |
|---|
| 817 | | - PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap |
|---|
| 812 | + pshufb %xmm14, %xmm\index # perform a 16 byte swap |
|---|
| 818 | 813 | pxor \TMP2, %xmm\index |
|---|
| 819 | 814 | .endr |
|---|
| 820 | 815 | lea 0x10(%arg1),%r10 |
|---|
| .. | .. |
|---|
| 825 | 820 | aes_loop_initial_\@: |
|---|
| 826 | 821 | MOVADQ (%r10),\TMP1 |
|---|
| 827 | 822 | .irpc index, \i_seq |
|---|
| 828 | | - AESENC \TMP1, %xmm\index |
|---|
| 823 | + aesenc \TMP1, %xmm\index |
|---|
| 829 | 824 | .endr |
|---|
| 830 | 825 | add $16,%r10 |
|---|
| 831 | 826 | sub $1,%eax |
|---|
| .. | .. |
|---|
| 833 | 828 | |
|---|
| 834 | 829 | MOVADQ (%r10), \TMP1 |
|---|
| 835 | 830 | .irpc index, \i_seq |
|---|
| 836 | | - AESENCLAST \TMP1, %xmm\index # Last Round |
|---|
| 831 | + aesenclast \TMP1, %xmm\index # Last Round |
|---|
| 837 | 832 | .endr |
|---|
| 838 | 833 | .irpc index, \i_seq |
|---|
| 839 | 834 | movdqu (%arg4 , %r11, 1), \TMP1 |
|---|
| .. | .. |
|---|
| 845 | 840 | .ifc \operation, dec |
|---|
| 846 | 841 | movdqa \TMP1, %xmm\index |
|---|
| 847 | 842 | .endif |
|---|
| 848 | | - PSHUFB_XMM %xmm14, %xmm\index |
|---|
| 843 | + pshufb %xmm14, %xmm\index |
|---|
| 849 | 844 | |
|---|
| 850 | 845 | # prepare plaintext/ciphertext for GHASH computation |
|---|
| 851 | 846 | .endr |
|---|
| .. | .. |
|---|
| 880 | 875 | MOVADQ ONE(%RIP),\TMP1 |
|---|
| 881 | 876 | paddd \TMP1, \XMM0 # INCR Y0 |
|---|
| 882 | 877 | MOVADQ \XMM0, \XMM1 |
|---|
| 883 | | - PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap |
|---|
| 878 | + pshufb %xmm14, \XMM1 # perform a 16 byte swap |
|---|
| 884 | 879 | |
|---|
| 885 | 880 | paddd \TMP1, \XMM0 # INCR Y0 |
|---|
| 886 | 881 | MOVADQ \XMM0, \XMM2 |
|---|
| 887 | | - PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap |
|---|
| 882 | + pshufb %xmm14, \XMM2 # perform a 16 byte swap |
|---|
| 888 | 883 | |
|---|
| 889 | 884 | paddd \TMP1, \XMM0 # INCR Y0 |
|---|
| 890 | 885 | MOVADQ \XMM0, \XMM3 |
|---|
| 891 | | - PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap |
|---|
| 886 | + pshufb %xmm14, \XMM3 # perform a 16 byte swap |
|---|
| 892 | 887 | |
|---|
| 893 | 888 | paddd \TMP1, \XMM0 # INCR Y0 |
|---|
| 894 | 889 | MOVADQ \XMM0, \XMM4 |
|---|
| 895 | | - PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap |
|---|
| 890 | + pshufb %xmm14, \XMM4 # perform a 16 byte swap |
|---|
| 896 | 891 | |
|---|
| 897 | 892 | MOVADQ 0(%arg1),\TMP1 |
|---|
| 898 | 893 | pxor \TMP1, \XMM1 |
|---|
| .. | .. |
|---|
| 901 | 896 | pxor \TMP1, \XMM4 |
|---|
| 902 | 897 | .irpc index, 1234 # do 4 rounds |
|---|
| 903 | 898 | movaps 0x10*\index(%arg1), \TMP1 |
|---|
| 904 | | - AESENC \TMP1, \XMM1 |
|---|
| 905 | | - AESENC \TMP1, \XMM2 |
|---|
| 906 | | - AESENC \TMP1, \XMM3 |
|---|
| 907 | | - AESENC \TMP1, \XMM4 |
|---|
| 899 | + aesenc \TMP1, \XMM1 |
|---|
| 900 | + aesenc \TMP1, \XMM2 |
|---|
| 901 | + aesenc \TMP1, \XMM3 |
|---|
| 902 | + aesenc \TMP1, \XMM4 |
|---|
| 908 | 903 | .endr |
|---|
| 909 | 904 | .irpc index, 56789 # do next 5 rounds |
|---|
| 910 | 905 | movaps 0x10*\index(%arg1), \TMP1 |
|---|
| 911 | | - AESENC \TMP1, \XMM1 |
|---|
| 912 | | - AESENC \TMP1, \XMM2 |
|---|
| 913 | | - AESENC \TMP1, \XMM3 |
|---|
| 914 | | - AESENC \TMP1, \XMM4 |
|---|
| 906 | + aesenc \TMP1, \XMM1 |
|---|
| 907 | + aesenc \TMP1, \XMM2 |
|---|
| 908 | + aesenc \TMP1, \XMM3 |
|---|
| 909 | + aesenc \TMP1, \XMM4 |
|---|
| 915 | 910 | .endr |
|---|
| 916 | 911 | lea 0xa0(%arg1),%r10 |
|---|
| 917 | 912 | mov keysize,%eax |
|---|
| .. | .. |
|---|
| 922 | 917 | aes_loop_pre_\@: |
|---|
| 923 | 918 | MOVADQ (%r10),\TMP2 |
|---|
| 924 | 919 | .irpc index, 1234 |
|---|
| 925 | | - AESENC \TMP2, %xmm\index |
|---|
| 920 | + aesenc \TMP2, %xmm\index |
|---|
| 926 | 921 | .endr |
|---|
| 927 | 922 | add $16,%r10 |
|---|
| 928 | 923 | sub $1,%eax |
|---|
| .. | .. |
|---|
| 930 | 925 | |
|---|
| 931 | 926 | aes_loop_pre_done\@: |
|---|
| 932 | 927 | MOVADQ (%r10), \TMP2 |
|---|
| 933 | | - AESENCLAST \TMP2, \XMM1 |
|---|
| 934 | | - AESENCLAST \TMP2, \XMM2 |
|---|
| 935 | | - AESENCLAST \TMP2, \XMM3 |
|---|
| 936 | | - AESENCLAST \TMP2, \XMM4 |
|---|
| 928 | + aesenclast \TMP2, \XMM1 |
|---|
| 929 | + aesenclast \TMP2, \XMM2 |
|---|
| 930 | + aesenclast \TMP2, \XMM3 |
|---|
| 931 | + aesenclast \TMP2, \XMM4 |
|---|
| 937 | 932 | movdqu 16*0(%arg4 , %r11 , 1), \TMP1 |
|---|
| 938 | 933 | pxor \TMP1, \XMM1 |
|---|
| 939 | 934 | .ifc \operation, dec |
|---|
| .. | .. |
|---|
| 965 | 960 | .endif |
|---|
| 966 | 961 | |
|---|
| 967 | 962 | add $64, %r11 |
|---|
| 968 | | - PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap |
|---|
| 963 | + pshufb %xmm14, \XMM1 # perform a 16 byte swap |
|---|
| 969 | 964 | pxor \XMMDst, \XMM1 |
|---|
| 970 | 965 | # combine GHASHed value with the corresponding ciphertext |
|---|
| 971 | | - PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap |
|---|
| 972 | | - PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap |
|---|
| 973 | | - PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap |
|---|
| 966 | + pshufb %xmm14, \XMM2 # perform a 16 byte swap |
|---|
| 967 | + pshufb %xmm14, \XMM3 # perform a 16 byte swap |
|---|
| 968 | + pshufb %xmm14, \XMM4 # perform a 16 byte swap |
|---|
| 974 | 969 | |
|---|
| 975 | 970 | _initial_blocks_done\@: |
|---|
| 976 | 971 | |
|---|
| .. | .. |
|---|
| 998 | 993 | pxor \XMM5, \TMP6 |
|---|
| 999 | 994 | paddd ONE(%rip), \XMM0 # INCR CNT |
|---|
| 1000 | 995 | movdqu HashKey_4(%arg2), \TMP5 |
|---|
| 1001 | | - PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 |
|---|
| 996 | + pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1 |
|---|
| 1002 | 997 | movdqa \XMM0, \XMM1 |
|---|
| 1003 | 998 | paddd ONE(%rip), \XMM0 # INCR CNT |
|---|
| 1004 | 999 | movdqa \XMM0, \XMM2 |
|---|
| .. | .. |
|---|
| 1006 | 1001 | movdqa \XMM0, \XMM3 |
|---|
| 1007 | 1002 | paddd ONE(%rip), \XMM0 # INCR CNT |
|---|
| 1008 | 1003 | movdqa \XMM0, \XMM4 |
|---|
| 1009 | | - PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap |
|---|
| 1010 | | - PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 |
|---|
| 1011 | | - PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap |
|---|
| 1012 | | - PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap |
|---|
| 1013 | | - PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap |
|---|
| 1004 | + pshufb %xmm15, \XMM1 # perform a 16 byte swap |
|---|
| 1005 | + pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0 |
|---|
| 1006 | + pshufb %xmm15, \XMM2 # perform a 16 byte swap |
|---|
| 1007 | + pshufb %xmm15, \XMM3 # perform a 16 byte swap |
|---|
| 1008 | + pshufb %xmm15, \XMM4 # perform a 16 byte swap |
|---|
| 1014 | 1009 | |
|---|
| 1015 | 1010 | pxor (%arg1), \XMM1 |
|---|
| 1016 | 1011 | pxor (%arg1), \XMM2 |
|---|
| 1017 | 1012 | pxor (%arg1), \XMM3 |
|---|
| 1018 | 1013 | pxor (%arg1), \XMM4 |
|---|
| 1019 | 1014 | movdqu HashKey_4_k(%arg2), \TMP5 |
|---|
| 1020 | | - PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) |
|---|
| 1015 | + pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) |
|---|
| 1021 | 1016 | movaps 0x10(%arg1), \TMP1 |
|---|
| 1022 | | - AESENC \TMP1, \XMM1 # Round 1 |
|---|
| 1023 | | - AESENC \TMP1, \XMM2 |
|---|
| 1024 | | - AESENC \TMP1, \XMM3 |
|---|
| 1025 | | - AESENC \TMP1, \XMM4 |
|---|
| 1017 | + aesenc \TMP1, \XMM1 # Round 1 |
|---|
| 1018 | + aesenc \TMP1, \XMM2 |
|---|
| 1019 | + aesenc \TMP1, \XMM3 |
|---|
| 1020 | + aesenc \TMP1, \XMM4 |
|---|
| 1026 | 1021 | movaps 0x20(%arg1), \TMP1 |
|---|
| 1027 | | - AESENC \TMP1, \XMM1 # Round 2 |
|---|
| 1028 | | - AESENC \TMP1, \XMM2 |
|---|
| 1029 | | - AESENC \TMP1, \XMM3 |
|---|
| 1030 | | - AESENC \TMP1, \XMM4 |
|---|
| 1022 | + aesenc \TMP1, \XMM1 # Round 2 |
|---|
| 1023 | + aesenc \TMP1, \XMM2 |
|---|
| 1024 | + aesenc \TMP1, \XMM3 |
|---|
| 1025 | + aesenc \TMP1, \XMM4 |
|---|
| 1031 | 1026 | movdqa \XMM6, \TMP1 |
|---|
| 1032 | 1027 | pshufd $78, \XMM6, \TMP2 |
|---|
| 1033 | 1028 | pxor \XMM6, \TMP2 |
|---|
| 1034 | 1029 | movdqu HashKey_3(%arg2), \TMP5 |
|---|
| 1035 | | - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 |
|---|
| 1030 | + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 |
|---|
| 1036 | 1031 | movaps 0x30(%arg1), \TMP3 |
|---|
| 1037 | | - AESENC \TMP3, \XMM1 # Round 3 |
|---|
| 1038 | | - AESENC \TMP3, \XMM2 |
|---|
| 1039 | | - AESENC \TMP3, \XMM3 |
|---|
| 1040 | | - AESENC \TMP3, \XMM4 |
|---|
| 1041 | | - PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 |
|---|
| 1032 | + aesenc \TMP3, \XMM1 # Round 3 |
|---|
| 1033 | + aesenc \TMP3, \XMM2 |
|---|
| 1034 | + aesenc \TMP3, \XMM3 |
|---|
| 1035 | + aesenc \TMP3, \XMM4 |
|---|
| 1036 | + pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0 |
|---|
| 1042 | 1037 | movaps 0x40(%arg1), \TMP3 |
|---|
| 1043 | | - AESENC \TMP3, \XMM1 # Round 4 |
|---|
| 1044 | | - AESENC \TMP3, \XMM2 |
|---|
| 1045 | | - AESENC \TMP3, \XMM3 |
|---|
| 1046 | | - AESENC \TMP3, \XMM4 |
|---|
| 1038 | + aesenc \TMP3, \XMM1 # Round 4 |
|---|
| 1039 | + aesenc \TMP3, \XMM2 |
|---|
| 1040 | + aesenc \TMP3, \XMM3 |
|---|
| 1041 | + aesenc \TMP3, \XMM4 |
|---|
| 1047 | 1042 | movdqu HashKey_3_k(%arg2), \TMP5 |
|---|
| 1048 | | - PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1043 | + pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1049 | 1044 | movaps 0x50(%arg1), \TMP3 |
|---|
| 1050 | | - AESENC \TMP3, \XMM1 # Round 5 |
|---|
| 1051 | | - AESENC \TMP3, \XMM2 |
|---|
| 1052 | | - AESENC \TMP3, \XMM3 |
|---|
| 1053 | | - AESENC \TMP3, \XMM4 |
|---|
| 1045 | + aesenc \TMP3, \XMM1 # Round 5 |
|---|
| 1046 | + aesenc \TMP3, \XMM2 |
|---|
| 1047 | + aesenc \TMP3, \XMM3 |
|---|
| 1048 | + aesenc \TMP3, \XMM4 |
|---|
| 1054 | 1049 | pxor \TMP1, \TMP4 |
|---|
| 1055 | 1050 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part |
|---|
| 1056 | 1051 | pxor \XMM6, \XMM5 |
|---|
| .. | .. |
|---|
| 1062 | 1057 | |
|---|
| 1063 | 1058 | # Multiply TMP5 * HashKey using karatsuba |
|---|
| 1064 | 1059 | |
|---|
| 1065 | | - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1060 | + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1066 | 1061 | movaps 0x60(%arg1), \TMP3 |
|---|
| 1067 | | - AESENC \TMP3, \XMM1 # Round 6 |
|---|
| 1068 | | - AESENC \TMP3, \XMM2 |
|---|
| 1069 | | - AESENC \TMP3, \XMM3 |
|---|
| 1070 | | - AESENC \TMP3, \XMM4 |
|---|
| 1071 | | - PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 |
|---|
| 1062 | + aesenc \TMP3, \XMM1 # Round 6 |
|---|
| 1063 | + aesenc \TMP3, \XMM2 |
|---|
| 1064 | + aesenc \TMP3, \XMM3 |
|---|
| 1065 | + aesenc \TMP3, \XMM4 |
|---|
| 1066 | + pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0 |
|---|
| 1072 | 1067 | movaps 0x70(%arg1), \TMP3 |
|---|
| 1073 | | - AESENC \TMP3, \XMM1 # Round 7 |
|---|
| 1074 | | - AESENC \TMP3, \XMM2 |
|---|
| 1075 | | - AESENC \TMP3, \XMM3 |
|---|
| 1076 | | - AESENC \TMP3, \XMM4 |
|---|
| 1068 | + aesenc \TMP3, \XMM1 # Round 7 |
|---|
| 1069 | + aesenc \TMP3, \XMM2 |
|---|
| 1070 | + aesenc \TMP3, \XMM3 |
|---|
| 1071 | + aesenc \TMP3, \XMM4 |
|---|
| 1077 | 1072 | movdqu HashKey_2_k(%arg2), \TMP5 |
|---|
| 1078 | | - PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1073 | + pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1079 | 1074 | movaps 0x80(%arg1), \TMP3 |
|---|
| 1080 | | - AESENC \TMP3, \XMM1 # Round 8 |
|---|
| 1081 | | - AESENC \TMP3, \XMM2 |
|---|
| 1082 | | - AESENC \TMP3, \XMM3 |
|---|
| 1083 | | - AESENC \TMP3, \XMM4 |
|---|
| 1075 | + aesenc \TMP3, \XMM1 # Round 8 |
|---|
| 1076 | + aesenc \TMP3, \XMM2 |
|---|
| 1077 | + aesenc \TMP3, \XMM3 |
|---|
| 1078 | + aesenc \TMP3, \XMM4 |
|---|
| 1084 | 1079 | pxor \TMP1, \TMP4 |
|---|
| 1085 | 1080 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part |
|---|
| 1086 | 1081 | pxor \XMM7, \XMM5 |
|---|
| .. | .. |
|---|
| 1093 | 1088 | pshufd $78, \XMM8, \TMP2 |
|---|
| 1094 | 1089 | pxor \XMM8, \TMP2 |
|---|
| 1095 | 1090 | movdqu HashKey(%arg2), \TMP5 |
|---|
| 1096 | | - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1091 | + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1097 | 1092 | movaps 0x90(%arg1), \TMP3 |
|---|
| 1098 | | - AESENC \TMP3, \XMM1 # Round 9 |
|---|
| 1099 | | - AESENC \TMP3, \XMM2 |
|---|
| 1100 | | - AESENC \TMP3, \XMM3 |
|---|
| 1101 | | - AESENC \TMP3, \XMM4 |
|---|
| 1102 | | - PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 |
|---|
| 1093 | + aesenc \TMP3, \XMM1 # Round 9 |
|---|
| 1094 | + aesenc \TMP3, \XMM2 |
|---|
| 1095 | + aesenc \TMP3, \XMM3 |
|---|
| 1096 | + aesenc \TMP3, \XMM4 |
|---|
| 1097 | + pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0 |
|---|
| 1103 | 1098 | lea 0xa0(%arg1),%r10 |
|---|
| 1104 | 1099 | mov keysize,%eax |
|---|
| 1105 | 1100 | shr $2,%eax # 128->4, 192->6, 256->8 |
|---|
| .. | .. |
|---|
| 1109 | 1104 | aes_loop_par_enc\@: |
|---|
| 1110 | 1105 | MOVADQ (%r10),\TMP3 |
|---|
| 1111 | 1106 | .irpc index, 1234 |
|---|
| 1112 | | - AESENC \TMP3, %xmm\index |
|---|
| 1107 | + aesenc \TMP3, %xmm\index |
|---|
| 1113 | 1108 | .endr |
|---|
| 1114 | 1109 | add $16,%r10 |
|---|
| 1115 | 1110 | sub $1,%eax |
|---|
| .. | .. |
|---|
| 1117 | 1112 | |
|---|
| 1118 | 1113 | aes_loop_par_enc_done\@: |
|---|
| 1119 | 1114 | MOVADQ (%r10), \TMP3 |
|---|
| 1120 | | - AESENCLAST \TMP3, \XMM1 # Round 10 |
|---|
| 1121 | | - AESENCLAST \TMP3, \XMM2 |
|---|
| 1122 | | - AESENCLAST \TMP3, \XMM3 |
|---|
| 1123 | | - AESENCLAST \TMP3, \XMM4 |
|---|
| 1115 | + aesenclast \TMP3, \XMM1 # Round 10 |
|---|
| 1116 | + aesenclast \TMP3, \XMM2 |
|---|
| 1117 | + aesenclast \TMP3, \XMM3 |
|---|
| 1118 | + aesenclast \TMP3, \XMM4 |
|---|
| 1124 | 1119 | movdqu HashKey_k(%arg2), \TMP5 |
|---|
| 1125 | | - PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1120 | + pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1126 | 1121 | movdqu (%arg4,%r11,1), \TMP3 |
|---|
| 1127 | 1122 | pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK |
|---|
| 1128 | 1123 | movdqu 16(%arg4,%r11,1), \TMP3 |
|---|
| .. | .. |
|---|
| 1135 | 1130 | movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer |
|---|
| 1136 | 1131 | movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer |
|---|
| 1137 | 1132 | movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer |
|---|
| 1138 | | - PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap |
|---|
| 1139 | | - PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap |
|---|
| 1140 | | - PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap |
|---|
| 1141 | | - PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap |
|---|
| 1133 | + pshufb %xmm15, \XMM1 # perform a 16 byte swap |
|---|
| 1134 | + pshufb %xmm15, \XMM2 # perform a 16 byte swap |
|---|
| 1135 | + pshufb %xmm15, \XMM3 # perform a 16 byte swap |
|---|
| 1136 | + pshufb %xmm15, \XMM4 # perform a 16 byte swap |
|---|
| 1142 | 1137 | |
|---|
| 1143 | 1138 | pxor \TMP4, \TMP1 |
|---|
| 1144 | 1139 | pxor \XMM8, \XMM5 |
|---|
| .. | .. |
|---|
| 1206 | 1201 | pxor \XMM5, \TMP6 |
|---|
| 1207 | 1202 | paddd ONE(%rip), \XMM0 # INCR CNT |
|---|
| 1208 | 1203 | movdqu HashKey_4(%arg2), \TMP5 |
|---|
| 1209 | | - PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 |
|---|
| 1204 | + pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1 |
|---|
| 1210 | 1205 | movdqa \XMM0, \XMM1 |
|---|
| 1211 | 1206 | paddd ONE(%rip), \XMM0 # INCR CNT |
|---|
| 1212 | 1207 | movdqa \XMM0, \XMM2 |
|---|
| .. | .. |
|---|
| 1214 | 1209 | movdqa \XMM0, \XMM3 |
|---|
| 1215 | 1210 | paddd ONE(%rip), \XMM0 # INCR CNT |
|---|
| 1216 | 1211 | movdqa \XMM0, \XMM4 |
|---|
| 1217 | | - PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap |
|---|
| 1218 | | - PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 |
|---|
| 1219 | | - PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap |
|---|
| 1220 | | - PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap |
|---|
| 1221 | | - PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap |
|---|
| 1212 | + pshufb %xmm15, \XMM1 # perform a 16 byte swap |
|---|
| 1213 | + pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0 |
|---|
| 1214 | + pshufb %xmm15, \XMM2 # perform a 16 byte swap |
|---|
| 1215 | + pshufb %xmm15, \XMM3 # perform a 16 byte swap |
|---|
| 1216 | + pshufb %xmm15, \XMM4 # perform a 16 byte swap |
|---|
| 1222 | 1217 | |
|---|
| 1223 | 1218 | pxor (%arg1), \XMM1 |
|---|
| 1224 | 1219 | pxor (%arg1), \XMM2 |
|---|
| 1225 | 1220 | pxor (%arg1), \XMM3 |
|---|
| 1226 | 1221 | pxor (%arg1), \XMM4 |
|---|
| 1227 | 1222 | movdqu HashKey_4_k(%arg2), \TMP5 |
|---|
| 1228 | | - PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) |
|---|
| 1223 | + pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) |
|---|
| 1229 | 1224 | movaps 0x10(%arg1), \TMP1 |
|---|
| 1230 | | - AESENC \TMP1, \XMM1 # Round 1 |
|---|
| 1231 | | - AESENC \TMP1, \XMM2 |
|---|
| 1232 | | - AESENC \TMP1, \XMM3 |
|---|
| 1233 | | - AESENC \TMP1, \XMM4 |
|---|
| 1225 | + aesenc \TMP1, \XMM1 # Round 1 |
|---|
| 1226 | + aesenc \TMP1, \XMM2 |
|---|
| 1227 | + aesenc \TMP1, \XMM3 |
|---|
| 1228 | + aesenc \TMP1, \XMM4 |
|---|
| 1234 | 1229 | movaps 0x20(%arg1), \TMP1 |
|---|
| 1235 | | - AESENC \TMP1, \XMM1 # Round 2 |
|---|
| 1236 | | - AESENC \TMP1, \XMM2 |
|---|
| 1237 | | - AESENC \TMP1, \XMM3 |
|---|
| 1238 | | - AESENC \TMP1, \XMM4 |
|---|
| 1230 | + aesenc \TMP1, \XMM1 # Round 2 |
|---|
| 1231 | + aesenc \TMP1, \XMM2 |
|---|
| 1232 | + aesenc \TMP1, \XMM3 |
|---|
| 1233 | + aesenc \TMP1, \XMM4 |
|---|
| 1239 | 1234 | movdqa \XMM6, \TMP1 |
|---|
| 1240 | 1235 | pshufd $78, \XMM6, \TMP2 |
|---|
| 1241 | 1236 | pxor \XMM6, \TMP2 |
|---|
| 1242 | 1237 | movdqu HashKey_3(%arg2), \TMP5 |
|---|
| 1243 | | - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 |
|---|
| 1238 | + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 |
|---|
| 1244 | 1239 | movaps 0x30(%arg1), \TMP3 |
|---|
| 1245 | | - AESENC \TMP3, \XMM1 # Round 3 |
|---|
| 1246 | | - AESENC \TMP3, \XMM2 |
|---|
| 1247 | | - AESENC \TMP3, \XMM3 |
|---|
| 1248 | | - AESENC \TMP3, \XMM4 |
|---|
| 1249 | | - PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 |
|---|
| 1240 | + aesenc \TMP3, \XMM1 # Round 3 |
|---|
| 1241 | + aesenc \TMP3, \XMM2 |
|---|
| 1242 | + aesenc \TMP3, \XMM3 |
|---|
| 1243 | + aesenc \TMP3, \XMM4 |
|---|
| 1244 | + pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0 |
|---|
| 1250 | 1245 | movaps 0x40(%arg1), \TMP3 |
|---|
| 1251 | | - AESENC \TMP3, \XMM1 # Round 4 |
|---|
| 1252 | | - AESENC \TMP3, \XMM2 |
|---|
| 1253 | | - AESENC \TMP3, \XMM3 |
|---|
| 1254 | | - AESENC \TMP3, \XMM4 |
|---|
| 1246 | + aesenc \TMP3, \XMM1 # Round 4 |
|---|
| 1247 | + aesenc \TMP3, \XMM2 |
|---|
| 1248 | + aesenc \TMP3, \XMM3 |
|---|
| 1249 | + aesenc \TMP3, \XMM4 |
|---|
| 1255 | 1250 | movdqu HashKey_3_k(%arg2), \TMP5 |
|---|
| 1256 | | - PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1251 | + pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1257 | 1252 | movaps 0x50(%arg1), \TMP3 |
|---|
| 1258 | | - AESENC \TMP3, \XMM1 # Round 5 |
|---|
| 1259 | | - AESENC \TMP3, \XMM2 |
|---|
| 1260 | | - AESENC \TMP3, \XMM3 |
|---|
| 1261 | | - AESENC \TMP3, \XMM4 |
|---|
| 1253 | + aesenc \TMP3, \XMM1 # Round 5 |
|---|
| 1254 | + aesenc \TMP3, \XMM2 |
|---|
| 1255 | + aesenc \TMP3, \XMM3 |
|---|
| 1256 | + aesenc \TMP3, \XMM4 |
|---|
| 1262 | 1257 | pxor \TMP1, \TMP4 |
|---|
| 1263 | 1258 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part |
|---|
| 1264 | 1259 | pxor \XMM6, \XMM5 |
|---|
| .. | .. |
|---|
| 1270 | 1265 | |
|---|
| 1271 | 1266 | # Multiply TMP5 * HashKey using karatsuba |
|---|
| 1272 | 1267 | |
|---|
| 1273 | | - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1268 | + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1274 | 1269 | movaps 0x60(%arg1), \TMP3 |
|---|
| 1275 | | - AESENC \TMP3, \XMM1 # Round 6 |
|---|
| 1276 | | - AESENC \TMP3, \XMM2 |
|---|
| 1277 | | - AESENC \TMP3, \XMM3 |
|---|
| 1278 | | - AESENC \TMP3, \XMM4 |
|---|
| 1279 | | - PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 |
|---|
| 1270 | + aesenc \TMP3, \XMM1 # Round 6 |
|---|
| 1271 | + aesenc \TMP3, \XMM2 |
|---|
| 1272 | + aesenc \TMP3, \XMM3 |
|---|
| 1273 | + aesenc \TMP3, \XMM4 |
|---|
| 1274 | + pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0 |
|---|
| 1280 | 1275 | movaps 0x70(%arg1), \TMP3 |
|---|
| 1281 | | - AESENC \TMP3, \XMM1 # Round 7 |
|---|
| 1282 | | - AESENC \TMP3, \XMM2 |
|---|
| 1283 | | - AESENC \TMP3, \XMM3 |
|---|
| 1284 | | - AESENC \TMP3, \XMM4 |
|---|
| 1276 | + aesenc \TMP3, \XMM1 # Round 7 |
|---|
| 1277 | + aesenc \TMP3, \XMM2 |
|---|
| 1278 | + aesenc \TMP3, \XMM3 |
|---|
| 1279 | + aesenc \TMP3, \XMM4 |
|---|
| 1285 | 1280 | movdqu HashKey_2_k(%arg2), \TMP5 |
|---|
| 1286 | | - PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1281 | + pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1287 | 1282 | movaps 0x80(%arg1), \TMP3 |
|---|
| 1288 | | - AESENC \TMP3, \XMM1 # Round 8 |
|---|
| 1289 | | - AESENC \TMP3, \XMM2 |
|---|
| 1290 | | - AESENC \TMP3, \XMM3 |
|---|
| 1291 | | - AESENC \TMP3, \XMM4 |
|---|
| 1283 | + aesenc \TMP3, \XMM1 # Round 8 |
|---|
| 1284 | + aesenc \TMP3, \XMM2 |
|---|
| 1285 | + aesenc \TMP3, \XMM3 |
|---|
| 1286 | + aesenc \TMP3, \XMM4 |
|---|
| 1292 | 1287 | pxor \TMP1, \TMP4 |
|---|
| 1293 | 1288 | # accumulate the results in TMP4:XMM5, TMP6 holds the middle part |
|---|
| 1294 | 1289 | pxor \XMM7, \XMM5 |
|---|
| .. | .. |
|---|
| 1301 | 1296 | pshufd $78, \XMM8, \TMP2 |
|---|
| 1302 | 1297 | pxor \XMM8, \TMP2 |
|---|
| 1303 | 1298 | movdqu HashKey(%arg2), \TMP5 |
|---|
| 1304 | | - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1299 | + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1305 | 1300 | movaps 0x90(%arg1), \TMP3 |
|---|
| 1306 | | - AESENC \TMP3, \XMM1 # Round 9 |
|---|
| 1307 | | - AESENC \TMP3, \XMM2 |
|---|
| 1308 | | - AESENC \TMP3, \XMM3 |
|---|
| 1309 | | - AESENC \TMP3, \XMM4 |
|---|
| 1310 | | - PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 |
|---|
| 1301 | + aesenc \TMP3, \XMM1 # Round 9 |
|---|
| 1302 | + aesenc \TMP3, \XMM2 |
|---|
| 1303 | + aesenc \TMP3, \XMM3 |
|---|
| 1304 | + aesenc \TMP3, \XMM4 |
|---|
| 1305 | + pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0 |
|---|
| 1311 | 1306 | lea 0xa0(%arg1),%r10 |
|---|
| 1312 | 1307 | mov keysize,%eax |
|---|
| 1313 | 1308 | shr $2,%eax # 128->4, 192->6, 256->8 |
|---|
| .. | .. |
|---|
| 1317 | 1312 | aes_loop_par_dec\@: |
|---|
| 1318 | 1313 | MOVADQ (%r10),\TMP3 |
|---|
| 1319 | 1314 | .irpc index, 1234 |
|---|
| 1320 | | - AESENC \TMP3, %xmm\index |
|---|
| 1315 | + aesenc \TMP3, %xmm\index |
|---|
| 1321 | 1316 | .endr |
|---|
| 1322 | 1317 | add $16,%r10 |
|---|
| 1323 | 1318 | sub $1,%eax |
|---|
| .. | .. |
|---|
| 1325 | 1320 | |
|---|
| 1326 | 1321 | aes_loop_par_dec_done\@: |
|---|
| 1327 | 1322 | MOVADQ (%r10), \TMP3 |
|---|
| 1328 | | - AESENCLAST \TMP3, \XMM1 # last round |
|---|
| 1329 | | - AESENCLAST \TMP3, \XMM2 |
|---|
| 1330 | | - AESENCLAST \TMP3, \XMM3 |
|---|
| 1331 | | - AESENCLAST \TMP3, \XMM4 |
|---|
| 1323 | + aesenclast \TMP3, \XMM1 # last round |
|---|
| 1324 | + aesenclast \TMP3, \XMM2 |
|---|
| 1325 | + aesenclast \TMP3, \XMM3 |
|---|
| 1326 | + aesenclast \TMP3, \XMM4 |
|---|
| 1332 | 1327 | movdqu HashKey_k(%arg2), \TMP5 |
|---|
| 1333 | | - PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1328 | + pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1334 | 1329 | movdqu (%arg4,%r11,1), \TMP3 |
|---|
| 1335 | 1330 | pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK |
|---|
| 1336 | 1331 | movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer |
|---|
| .. | .. |
|---|
| 1347 | 1342 | pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK |
|---|
| 1348 | 1343 | movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer |
|---|
| 1349 | 1344 | movdqa \TMP3, \XMM4 |
|---|
| 1350 | | - PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap |
|---|
| 1351 | | - PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap |
|---|
| 1352 | | - PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap |
|---|
| 1353 | | - PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap |
|---|
| 1345 | + pshufb %xmm15, \XMM1 # perform a 16 byte swap |
|---|
| 1346 | + pshufb %xmm15, \XMM2 # perform a 16 byte swap |
|---|
| 1347 | + pshufb %xmm15, \XMM3 # perform a 16 byte swap |
|---|
| 1348 | + pshufb %xmm15, \XMM4 # perform a 16 byte swap |
|---|
| 1354 | 1349 | |
|---|
| 1355 | 1350 | pxor \TMP4, \TMP1 |
|---|
| 1356 | 1351 | pxor \XMM8, \XMM5 |
|---|
| .. | .. |
|---|
| 1406 | 1401 | pshufd $78, \XMM1, \TMP2 |
|---|
| 1407 | 1402 | pxor \XMM1, \TMP2 |
|---|
| 1408 | 1403 | movdqu HashKey_4(%arg2), \TMP5 |
|---|
| 1409 | | - PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 |
|---|
| 1410 | | - PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 |
|---|
| 1404 | + pclmulqdq $0x11, \TMP5, \TMP6 # TMP6 = a1*b1 |
|---|
| 1405 | + pclmulqdq $0x00, \TMP5, \XMM1 # XMM1 = a0*b0 |
|---|
| 1411 | 1406 | movdqu HashKey_4_k(%arg2), \TMP4 |
|---|
| 1412 | | - PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1407 | + pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1413 | 1408 | movdqa \XMM1, \XMMDst |
|---|
| 1414 | 1409 | movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 |
|---|
| 1415 | 1410 | |
|---|
| .. | .. |
|---|
| 1419 | 1414 | pshufd $78, \XMM2, \TMP2 |
|---|
| 1420 | 1415 | pxor \XMM2, \TMP2 |
|---|
| 1421 | 1416 | movdqu HashKey_3(%arg2), \TMP5 |
|---|
| 1422 | | - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1423 | | - PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 |
|---|
| 1417 | + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1418 | + pclmulqdq $0x00, \TMP5, \XMM2 # XMM2 = a0*b0 |
|---|
| 1424 | 1419 | movdqu HashKey_3_k(%arg2), \TMP4 |
|---|
| 1425 | | - PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1420 | + pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1426 | 1421 | pxor \TMP1, \TMP6 |
|---|
| 1427 | 1422 | pxor \XMM2, \XMMDst |
|---|
| 1428 | 1423 | pxor \TMP2, \XMM1 |
|---|
| .. | .. |
|---|
| 1434 | 1429 | pshufd $78, \XMM3, \TMP2 |
|---|
| 1435 | 1430 | pxor \XMM3, \TMP2 |
|---|
| 1436 | 1431 | movdqu HashKey_2(%arg2), \TMP5 |
|---|
| 1437 | | - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1438 | | - PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 |
|---|
| 1432 | + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1433 | + pclmulqdq $0x00, \TMP5, \XMM3 # XMM3 = a0*b0 |
|---|
| 1439 | 1434 | movdqu HashKey_2_k(%arg2), \TMP4 |
|---|
| 1440 | | - PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1435 | + pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1441 | 1436 | pxor \TMP1, \TMP6 |
|---|
| 1442 | 1437 | pxor \XMM3, \XMMDst |
|---|
| 1443 | 1438 | pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 |
|---|
| .. | .. |
|---|
| 1447 | 1442 | pshufd $78, \XMM4, \TMP2 |
|---|
| 1448 | 1443 | pxor \XMM4, \TMP2 |
|---|
| 1449 | 1444 | movdqu HashKey(%arg2), \TMP5 |
|---|
| 1450 | | - PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1451 | | - PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 |
|---|
| 1445 | + pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1 |
|---|
| 1446 | + pclmulqdq $0x00, \TMP5, \XMM4 # XMM4 = a0*b0 |
|---|
| 1452 | 1447 | movdqu HashKey_k(%arg2), \TMP4 |
|---|
| 1453 | | - PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1448 | + pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) |
|---|
| 1454 | 1449 | pxor \TMP1, \TMP6 |
|---|
| 1455 | 1450 | pxor \XMM4, \XMMDst |
|---|
| 1456 | 1451 | pxor \XMM1, \TMP2 |
|---|
| .. | .. |
|---|
| 1508 | 1503 | |
|---|
| 1509 | 1504 | _esb_loop_\@: |
|---|
| 1510 | 1505 | MOVADQ (%r10),\TMP1 |
|---|
| 1511 | | - AESENC \TMP1,\XMM0 |
|---|
| 1506 | + aesenc \TMP1,\XMM0 |
|---|
| 1512 | 1507 | add $16,%r10 |
|---|
| 1513 | 1508 | sub $1,%eax |
|---|
| 1514 | 1509 | jnz _esb_loop_\@ |
|---|
| 1515 | 1510 | |
|---|
| 1516 | 1511 | MOVADQ (%r10),\TMP1 |
|---|
| 1517 | | - AESENCLAST \TMP1,\XMM0 |
|---|
| 1512 | + aesenclast \TMP1,\XMM0 |
|---|
| 1518 | 1513 | .endm |
|---|
| 1519 | 1514 | /***************************************************************************** |
|---|
| 1520 | 1515 | * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. |
|---|
| .. | .. |
|---|
| 1596 | 1591 | * poly = x^128 + x^127 + x^126 + x^121 + 1 |
|---|
| 1597 | 1592 | * |
|---|
| 1598 | 1593 | *****************************************************************************/ |
|---|
| 1599 | | -ENTRY(aesni_gcm_dec) |
|---|
| 1594 | +SYM_FUNC_START(aesni_gcm_dec) |
|---|
| 1600 | 1595 | FUNC_SAVE |
|---|
| 1601 | 1596 | |
|---|
| 1602 | 1597 | GCM_INIT %arg6, arg7, arg8, arg9 |
|---|
| 1603 | 1598 | GCM_ENC_DEC dec |
|---|
| 1604 | 1599 | GCM_COMPLETE arg10, arg11 |
|---|
| 1605 | 1600 | FUNC_RESTORE |
|---|
| 1606 | | - ret |
|---|
| 1607 | | -ENDPROC(aesni_gcm_dec) |
|---|
| 1601 | + RET |
|---|
| 1602 | +SYM_FUNC_END(aesni_gcm_dec) |
|---|
| 1608 | 1603 | |
|---|
| 1609 | 1604 | |
|---|
| 1610 | 1605 | /***************************************************************************** |
|---|
| .. | .. |
|---|
| 1684 | 1679 | * |
|---|
| 1685 | 1680 | * poly = x^128 + x^127 + x^126 + x^121 + 1 |
|---|
| 1686 | 1681 | ***************************************************************************/ |
|---|
| 1687 | | -ENTRY(aesni_gcm_enc) |
|---|
| 1682 | +SYM_FUNC_START(aesni_gcm_enc) |
|---|
| 1688 | 1683 | FUNC_SAVE |
|---|
| 1689 | 1684 | |
|---|
| 1690 | 1685 | GCM_INIT %arg6, arg7, arg8, arg9 |
|---|
| .. | .. |
|---|
| 1692 | 1687 | |
|---|
| 1693 | 1688 | GCM_COMPLETE arg10, arg11 |
|---|
| 1694 | 1689 | FUNC_RESTORE |
|---|
| 1695 | | - ret |
|---|
| 1696 | | -ENDPROC(aesni_gcm_enc) |
|---|
| 1690 | + RET |
|---|
| 1691 | +SYM_FUNC_END(aesni_gcm_enc) |
|---|
| 1697 | 1692 | |
|---|
| 1698 | 1693 | /***************************************************************************** |
|---|
| 1699 | 1694 | * void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. |
|---|
| .. | .. |
|---|
| 1706 | 1701 | * const u8 *aad, // Additional Authentication Data (AAD) |
|---|
| 1707 | 1702 | * u64 aad_len) // Length of AAD in bytes. |
|---|
| 1708 | 1703 | */ |
|---|
| 1709 | | -ENTRY(aesni_gcm_init) |
|---|
| 1704 | +SYM_FUNC_START(aesni_gcm_init) |
|---|
| 1710 | 1705 | FUNC_SAVE |
|---|
| 1711 | 1706 | GCM_INIT %arg3, %arg4,%arg5, %arg6 |
|---|
| 1712 | 1707 | FUNC_RESTORE |
|---|
| 1713 | | - ret |
|---|
| 1714 | | -ENDPROC(aesni_gcm_init) |
|---|
| 1708 | + RET |
|---|
| 1709 | +SYM_FUNC_END(aesni_gcm_init) |
|---|
| 1715 | 1710 | |
|---|
| 1716 | 1711 | /***************************************************************************** |
|---|
| 1717 | 1712 | * void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. |
|---|
| .. | .. |
|---|
| 1721 | 1716 | * const u8 *in, // Plaintext input |
|---|
| 1722 | 1717 | * u64 plaintext_len, // Length of data in bytes for encryption. |
|---|
| 1723 | 1718 | */ |
|---|
| 1724 | | -ENTRY(aesni_gcm_enc_update) |
|---|
| 1719 | +SYM_FUNC_START(aesni_gcm_enc_update) |
|---|
| 1725 | 1720 | FUNC_SAVE |
|---|
| 1726 | 1721 | GCM_ENC_DEC enc |
|---|
| 1727 | 1722 | FUNC_RESTORE |
|---|
| 1728 | | - ret |
|---|
| 1729 | | -ENDPROC(aesni_gcm_enc_update) |
|---|
| 1723 | + RET |
|---|
| 1724 | +SYM_FUNC_END(aesni_gcm_enc_update) |
|---|
| 1730 | 1725 | |
|---|
| 1731 | 1726 | /***************************************************************************** |
|---|
| 1732 | 1727 | * void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. |
|---|
| .. | .. |
|---|
| 1736 | 1731 | * const u8 *in, // Plaintext input |
|---|
| 1737 | 1732 | * u64 plaintext_len, // Length of data in bytes for encryption. |
|---|
| 1738 | 1733 | */ |
|---|
| 1739 | | -ENTRY(aesni_gcm_dec_update) |
|---|
| 1734 | +SYM_FUNC_START(aesni_gcm_dec_update) |
|---|
| 1740 | 1735 | FUNC_SAVE |
|---|
| 1741 | 1736 | GCM_ENC_DEC dec |
|---|
| 1742 | 1737 | FUNC_RESTORE |
|---|
| 1743 | | - ret |
|---|
| 1744 | | -ENDPROC(aesni_gcm_dec_update) |
|---|
| 1738 | + RET |
|---|
| 1739 | +SYM_FUNC_END(aesni_gcm_dec_update) |
|---|
| 1745 | 1740 | |
|---|
| 1746 | 1741 | /***************************************************************************** |
|---|
| 1747 | 1742 | * void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. |
|---|
| .. | .. |
|---|
| 1751 | 1746 | * u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), |
|---|
| 1752 | 1747 | * // 12 or 8. |
|---|
| 1753 | 1748 | */ |
|---|
| 1754 | | -ENTRY(aesni_gcm_finalize) |
|---|
| 1749 | +SYM_FUNC_START(aesni_gcm_finalize) |
|---|
| 1755 | 1750 | FUNC_SAVE |
|---|
| 1756 | 1751 | GCM_COMPLETE %arg3 %arg4 |
|---|
| 1757 | 1752 | FUNC_RESTORE |
|---|
| 1758 | | - ret |
|---|
| 1759 | | -ENDPROC(aesni_gcm_finalize) |
|---|
| 1753 | + RET |
|---|
| 1754 | +SYM_FUNC_END(aesni_gcm_finalize) |
|---|
| 1760 | 1755 | |
|---|
| 1761 | 1756 | #endif |
|---|
| 1762 | 1757 | |
|---|
| 1763 | 1758 | |
|---|
| 1764 | | -.align 4 |
|---|
| 1765 | | -_key_expansion_128: |
|---|
| 1766 | | -_key_expansion_256a: |
|---|
| 1759 | +SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128) |
|---|
| 1760 | +SYM_FUNC_START_LOCAL(_key_expansion_256a) |
|---|
| 1767 | 1761 | pshufd $0b11111111, %xmm1, %xmm1 |
|---|
| 1768 | 1762 | shufps $0b00010000, %xmm0, %xmm4 |
|---|
| 1769 | 1763 | pxor %xmm4, %xmm0 |
|---|
| .. | .. |
|---|
| 1772 | 1766 | pxor %xmm1, %xmm0 |
|---|
| 1773 | 1767 | movaps %xmm0, (TKEYP) |
|---|
| 1774 | 1768 | add $0x10, TKEYP |
|---|
| 1775 | | - ret |
|---|
| 1776 | | -ENDPROC(_key_expansion_128) |
|---|
| 1777 | | -ENDPROC(_key_expansion_256a) |
|---|
| 1769 | + RET |
|---|
| 1770 | +SYM_FUNC_END(_key_expansion_256a) |
|---|
| 1771 | +SYM_FUNC_END_ALIAS(_key_expansion_128) |
|---|
| 1778 | 1772 | |
|---|
| 1779 | | -.align 4 |
|---|
| 1780 | | -_key_expansion_192a: |
|---|
| 1773 | +SYM_FUNC_START_LOCAL(_key_expansion_192a) |
|---|
| 1781 | 1774 | pshufd $0b01010101, %xmm1, %xmm1 |
|---|
| 1782 | 1775 | shufps $0b00010000, %xmm0, %xmm4 |
|---|
| 1783 | 1776 | pxor %xmm4, %xmm0 |
|---|
| .. | .. |
|---|
| 1798 | 1791 | shufps $0b01001110, %xmm2, %xmm1 |
|---|
| 1799 | 1792 | movaps %xmm1, 0x10(TKEYP) |
|---|
| 1800 | 1793 | add $0x20, TKEYP |
|---|
| 1801 | | - ret |
|---|
| 1802 | | -ENDPROC(_key_expansion_192a) |
|---|
| 1794 | + RET |
|---|
| 1795 | +SYM_FUNC_END(_key_expansion_192a) |
|---|
| 1803 | 1796 | |
|---|
| 1804 | | -.align 4 |
|---|
| 1805 | | -_key_expansion_192b: |
|---|
| 1797 | +SYM_FUNC_START_LOCAL(_key_expansion_192b) |
|---|
| 1806 | 1798 | pshufd $0b01010101, %xmm1, %xmm1 |
|---|
| 1807 | 1799 | shufps $0b00010000, %xmm0, %xmm4 |
|---|
| 1808 | 1800 | pxor %xmm4, %xmm0 |
|---|
| .. | .. |
|---|
| 1818 | 1810 | |
|---|
| 1819 | 1811 | movaps %xmm0, (TKEYP) |
|---|
| 1820 | 1812 | add $0x10, TKEYP |
|---|
| 1821 | | - ret |
|---|
| 1822 | | -ENDPROC(_key_expansion_192b) |
|---|
| 1813 | + RET |
|---|
| 1814 | +SYM_FUNC_END(_key_expansion_192b) |
|---|
| 1823 | 1815 | |
|---|
| 1824 | | -.align 4 |
|---|
| 1825 | | -_key_expansion_256b: |
|---|
| 1816 | +SYM_FUNC_START_LOCAL(_key_expansion_256b) |
|---|
| 1826 | 1817 | pshufd $0b10101010, %xmm1, %xmm1 |
|---|
| 1827 | 1818 | shufps $0b00010000, %xmm2, %xmm4 |
|---|
| 1828 | 1819 | pxor %xmm4, %xmm2 |
|---|
| .. | .. |
|---|
| 1831 | 1822 | pxor %xmm1, %xmm2 |
|---|
| 1832 | 1823 | movaps %xmm2, (TKEYP) |
|---|
| 1833 | 1824 | add $0x10, TKEYP |
|---|
| 1834 | | - ret |
|---|
| 1835 | | -ENDPROC(_key_expansion_256b) |
|---|
| 1825 | + RET |
|---|
| 1826 | +SYM_FUNC_END(_key_expansion_256b) |
|---|
| 1836 | 1827 | |
|---|
| 1837 | 1828 | /* |
|---|
| 1838 | 1829 | * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, |
|---|
| 1839 | 1830 | * unsigned int key_len) |
|---|
| 1840 | 1831 | */ |
|---|
| 1841 | | -ENTRY(aesni_set_key) |
|---|
| 1832 | +SYM_FUNC_START(aesni_set_key) |
|---|
| 1842 | 1833 | FRAME_BEGIN |
|---|
| 1843 | 1834 | #ifndef __x86_64__ |
|---|
| 1844 | 1835 | pushl KEYP |
|---|
| .. | .. |
|---|
| 1857 | 1848 | movups 0x10(UKEYP), %xmm2 # other user key |
|---|
| 1858 | 1849 | movaps %xmm2, (TKEYP) |
|---|
| 1859 | 1850 | add $0x10, TKEYP |
|---|
| 1860 | | - AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
|---|
| 1851 | + aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 |
|---|
| 1861 | 1852 | call _key_expansion_256a |
|---|
| 1862 | | - AESKEYGENASSIST 0x1 %xmm0 %xmm1 |
|---|
| 1853 | + aeskeygenassist $0x1, %xmm0, %xmm1 |
|---|
| 1863 | 1854 | call _key_expansion_256b |
|---|
| 1864 | | - AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 |
|---|
| 1855 | + aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 |
|---|
| 1865 | 1856 | call _key_expansion_256a |
|---|
| 1866 | | - AESKEYGENASSIST 0x2 %xmm0 %xmm1 |
|---|
| 1857 | + aeskeygenassist $0x2, %xmm0, %xmm1 |
|---|
| 1867 | 1858 | call _key_expansion_256b |
|---|
| 1868 | | - AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 |
|---|
| 1859 | + aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 |
|---|
| 1869 | 1860 | call _key_expansion_256a |
|---|
| 1870 | | - AESKEYGENASSIST 0x4 %xmm0 %xmm1 |
|---|
| 1861 | + aeskeygenassist $0x4, %xmm0, %xmm1 |
|---|
| 1871 | 1862 | call _key_expansion_256b |
|---|
| 1872 | | - AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 |
|---|
| 1863 | + aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 |
|---|
| 1873 | 1864 | call _key_expansion_256a |
|---|
| 1874 | | - AESKEYGENASSIST 0x8 %xmm0 %xmm1 |
|---|
| 1865 | + aeskeygenassist $0x8, %xmm0, %xmm1 |
|---|
| 1875 | 1866 | call _key_expansion_256b |
|---|
| 1876 | | - AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 |
|---|
| 1867 | + aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 |
|---|
| 1877 | 1868 | call _key_expansion_256a |
|---|
| 1878 | | - AESKEYGENASSIST 0x10 %xmm0 %xmm1 |
|---|
| 1869 | + aeskeygenassist $0x10, %xmm0, %xmm1 |
|---|
| 1879 | 1870 | call _key_expansion_256b |
|---|
| 1880 | | - AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 |
|---|
| 1871 | + aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 |
|---|
| 1881 | 1872 | call _key_expansion_256a |
|---|
| 1882 | | - AESKEYGENASSIST 0x20 %xmm0 %xmm1 |
|---|
| 1873 | + aeskeygenassist $0x20, %xmm0, %xmm1 |
|---|
| 1883 | 1874 | call _key_expansion_256b |
|---|
| 1884 | | - AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 |
|---|
| 1875 | + aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 |
|---|
| 1885 | 1876 | call _key_expansion_256a |
|---|
| 1886 | 1877 | jmp .Ldec_key |
|---|
| 1887 | 1878 | .Lenc_key192: |
|---|
| 1888 | 1879 | movq 0x10(UKEYP), %xmm2 # other user key |
|---|
| 1889 | | - AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
|---|
| 1880 | + aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 |
|---|
| 1890 | 1881 | call _key_expansion_192a |
|---|
| 1891 | | - AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 |
|---|
| 1882 | + aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 |
|---|
| 1892 | 1883 | call _key_expansion_192b |
|---|
| 1893 | | - AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 |
|---|
| 1884 | + aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 |
|---|
| 1894 | 1885 | call _key_expansion_192a |
|---|
| 1895 | | - AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 |
|---|
| 1886 | + aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 |
|---|
| 1896 | 1887 | call _key_expansion_192b |
|---|
| 1897 | | - AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 |
|---|
| 1888 | + aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 |
|---|
| 1898 | 1889 | call _key_expansion_192a |
|---|
| 1899 | | - AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 |
|---|
| 1890 | + aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 |
|---|
| 1900 | 1891 | call _key_expansion_192b |
|---|
| 1901 | | - AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 |
|---|
| 1892 | + aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 |
|---|
| 1902 | 1893 | call _key_expansion_192a |
|---|
| 1903 | | - AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8 |
|---|
| 1894 | + aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 |
|---|
| 1904 | 1895 | call _key_expansion_192b |
|---|
| 1905 | 1896 | jmp .Ldec_key |
|---|
| 1906 | 1897 | .Lenc_key128: |
|---|
| 1907 | | - AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1 |
|---|
| 1898 | + aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 |
|---|
| 1908 | 1899 | call _key_expansion_128 |
|---|
| 1909 | | - AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2 |
|---|
| 1900 | + aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 |
|---|
| 1910 | 1901 | call _key_expansion_128 |
|---|
| 1911 | | - AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3 |
|---|
| 1902 | + aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 |
|---|
| 1912 | 1903 | call _key_expansion_128 |
|---|
| 1913 | | - AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4 |
|---|
| 1904 | + aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 |
|---|
| 1914 | 1905 | call _key_expansion_128 |
|---|
| 1915 | | - AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5 |
|---|
| 1906 | + aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 |
|---|
| 1916 | 1907 | call _key_expansion_128 |
|---|
| 1917 | | - AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6 |
|---|
| 1908 | + aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 |
|---|
| 1918 | 1909 | call _key_expansion_128 |
|---|
| 1919 | | - AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7 |
|---|
| 1910 | + aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 |
|---|
| 1920 | 1911 | call _key_expansion_128 |
|---|
| 1921 | | - AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8 |
|---|
| 1912 | + aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 |
|---|
| 1922 | 1913 | call _key_expansion_128 |
|---|
| 1923 | | - AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9 |
|---|
| 1914 | + aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 |
|---|
| 1924 | 1915 | call _key_expansion_128 |
|---|
| 1925 | | - AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 |
|---|
| 1916 | + aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 |
|---|
| 1926 | 1917 | call _key_expansion_128 |
|---|
| 1927 | 1918 | .Ldec_key: |
|---|
| 1928 | 1919 | sub $0x10, TKEYP |
|---|
| .. | .. |
|---|
| 1935 | 1926 | .align 4 |
|---|
| 1936 | 1927 | .Ldec_key_loop: |
|---|
| 1937 | 1928 | movaps (KEYP), %xmm0 |
|---|
| 1938 | | - AESIMC %xmm0 %xmm1 |
|---|
| 1929 | + aesimc %xmm0, %xmm1 |
|---|
| 1939 | 1930 | movaps %xmm1, (UKEYP) |
|---|
| 1940 | 1931 | add $0x10, KEYP |
|---|
| 1941 | 1932 | sub $0x10, UKEYP |
|---|
| .. | .. |
|---|
| 1946 | 1937 | popl KEYP |
|---|
| 1947 | 1938 | #endif |
|---|
| 1948 | 1939 | FRAME_END |
|---|
| 1949 | | - ret |
|---|
| 1950 | | -ENDPROC(aesni_set_key) |
|---|
| 1940 | + RET |
|---|
| 1941 | +SYM_FUNC_END(aesni_set_key) |
|---|
| 1951 | 1942 | |
|---|
| 1952 | 1943 | /* |
|---|
| 1953 | | - * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) |
|---|
| 1944 | + * void aesni_enc(const void *ctx, u8 *dst, const u8 *src) |
|---|
| 1954 | 1945 | */ |
|---|
| 1955 | | -ENTRY(aesni_enc) |
|---|
| 1946 | +SYM_FUNC_START(aesni_enc) |
|---|
| 1956 | 1947 | FRAME_BEGIN |
|---|
| 1957 | 1948 | #ifndef __x86_64__ |
|---|
| 1958 | 1949 | pushl KEYP |
|---|
| .. | .. |
|---|
| 1970 | 1961 | popl KEYP |
|---|
| 1971 | 1962 | #endif |
|---|
| 1972 | 1963 | FRAME_END |
|---|
| 1973 | | - ret |
|---|
| 1974 | | -ENDPROC(aesni_enc) |
|---|
| 1964 | + RET |
|---|
| 1965 | +SYM_FUNC_END(aesni_enc) |
|---|
| 1975 | 1966 | |
|---|
| 1976 | 1967 | /* |
|---|
| 1977 | 1968 | * _aesni_enc1: internal ABI |
|---|
| .. | .. |
|---|
| 1985 | 1976 | * KEY |
|---|
| 1986 | 1977 | * TKEYP (T1) |
|---|
| 1987 | 1978 | */ |
|---|
| 1988 | | -.align 4 |
|---|
| 1989 | | -_aesni_enc1: |
|---|
| 1979 | +SYM_FUNC_START_LOCAL(_aesni_enc1) |
|---|
| 1990 | 1980 | movaps (KEYP), KEY # key |
|---|
| 1991 | 1981 | mov KEYP, TKEYP |
|---|
| 1992 | 1982 | pxor KEY, STATE # round 0 |
|---|
| .. | .. |
|---|
| 1997 | 1987 | je .Lenc192 |
|---|
| 1998 | 1988 | add $0x20, TKEYP |
|---|
| 1999 | 1989 | movaps -0x60(TKEYP), KEY |
|---|
| 2000 | | - AESENC KEY STATE |
|---|
| 1990 | + aesenc KEY, STATE |
|---|
| 2001 | 1991 | movaps -0x50(TKEYP), KEY |
|---|
| 2002 | | - AESENC KEY STATE |
|---|
| 1992 | + aesenc KEY, STATE |
|---|
| 2003 | 1993 | .align 4 |
|---|
| 2004 | 1994 | .Lenc192: |
|---|
| 2005 | 1995 | movaps -0x40(TKEYP), KEY |
|---|
| 2006 | | - AESENC KEY STATE |
|---|
| 1996 | + aesenc KEY, STATE |
|---|
| 2007 | 1997 | movaps -0x30(TKEYP), KEY |
|---|
| 2008 | | - AESENC KEY STATE |
|---|
| 1998 | + aesenc KEY, STATE |
|---|
| 2009 | 1999 | .align 4 |
|---|
| 2010 | 2000 | .Lenc128: |
|---|
| 2011 | 2001 | movaps -0x20(TKEYP), KEY |
|---|
| 2012 | | - AESENC KEY STATE |
|---|
| 2002 | + aesenc KEY, STATE |
|---|
| 2013 | 2003 | movaps -0x10(TKEYP), KEY |
|---|
| 2014 | | - AESENC KEY STATE |
|---|
| 2004 | + aesenc KEY, STATE |
|---|
| 2015 | 2005 | movaps (TKEYP), KEY |
|---|
| 2016 | | - AESENC KEY STATE |
|---|
| 2006 | + aesenc KEY, STATE |
|---|
| 2017 | 2007 | movaps 0x10(TKEYP), KEY |
|---|
| 2018 | | - AESENC KEY STATE |
|---|
| 2008 | + aesenc KEY, STATE |
|---|
| 2019 | 2009 | movaps 0x20(TKEYP), KEY |
|---|
| 2020 | | - AESENC KEY STATE |
|---|
| 2010 | + aesenc KEY, STATE |
|---|
| 2021 | 2011 | movaps 0x30(TKEYP), KEY |
|---|
| 2022 | | - AESENC KEY STATE |
|---|
| 2012 | + aesenc KEY, STATE |
|---|
| 2023 | 2013 | movaps 0x40(TKEYP), KEY |
|---|
| 2024 | | - AESENC KEY STATE |
|---|
| 2014 | + aesenc KEY, STATE |
|---|
| 2025 | 2015 | movaps 0x50(TKEYP), KEY |
|---|
| 2026 | | - AESENC KEY STATE |
|---|
| 2016 | + aesenc KEY, STATE |
|---|
| 2027 | 2017 | movaps 0x60(TKEYP), KEY |
|---|
| 2028 | | - AESENC KEY STATE |
|---|
| 2018 | + aesenc KEY, STATE |
|---|
| 2029 | 2019 | movaps 0x70(TKEYP), KEY |
|---|
| 2030 | | - AESENCLAST KEY STATE |
|---|
| 2031 | | - ret |
|---|
| 2032 | | -ENDPROC(_aesni_enc1) |
|---|
| 2020 | + aesenclast KEY, STATE |
|---|
| 2021 | + RET |
|---|
| 2022 | +SYM_FUNC_END(_aesni_enc1) |
|---|
| 2033 | 2023 | |
|---|
| 2034 | 2024 | /* |
|---|
| 2035 | 2025 | * _aesni_enc4: internal ABI |
|---|
| .. | .. |
|---|
| 2049 | 2039 | * KEY |
|---|
| 2050 | 2040 | * TKEYP (T1) |
|---|
| 2051 | 2041 | */ |
|---|
| 2052 | | -.align 4 |
|---|
| 2053 | | -_aesni_enc4: |
|---|
| 2042 | +SYM_FUNC_START_LOCAL(_aesni_enc4) |
|---|
| 2054 | 2043 | movaps (KEYP), KEY # key |
|---|
| 2055 | 2044 | mov KEYP, TKEYP |
|---|
| 2056 | 2045 | pxor KEY, STATE1 # round 0 |
|---|
| .. | .. |
|---|
| 2064 | 2053 | je .L4enc192 |
|---|
| 2065 | 2054 | add $0x20, TKEYP |
|---|
| 2066 | 2055 | movaps -0x60(TKEYP), KEY |
|---|
| 2067 | | - AESENC KEY STATE1 |
|---|
| 2068 | | - AESENC KEY STATE2 |
|---|
| 2069 | | - AESENC KEY STATE3 |
|---|
| 2070 | | - AESENC KEY STATE4 |
|---|
| 2056 | + aesenc KEY, STATE1 |
|---|
| 2057 | + aesenc KEY, STATE2 |
|---|
| 2058 | + aesenc KEY, STATE3 |
|---|
| 2059 | + aesenc KEY, STATE4 |
|---|
| 2071 | 2060 | movaps -0x50(TKEYP), KEY |
|---|
| 2072 | | - AESENC KEY STATE1 |
|---|
| 2073 | | - AESENC KEY STATE2 |
|---|
| 2074 | | - AESENC KEY STATE3 |
|---|
| 2075 | | - AESENC KEY STATE4 |
|---|
| 2061 | + aesenc KEY, STATE1 |
|---|
| 2062 | + aesenc KEY, STATE2 |
|---|
| 2063 | + aesenc KEY, STATE3 |
|---|
| 2064 | + aesenc KEY, STATE4 |
|---|
| 2076 | 2065 | #.align 4 |
|---|
| 2077 | 2066 | .L4enc192: |
|---|
| 2078 | 2067 | movaps -0x40(TKEYP), KEY |
|---|
| 2079 | | - AESENC KEY STATE1 |
|---|
| 2080 | | - AESENC KEY STATE2 |
|---|
| 2081 | | - AESENC KEY STATE3 |
|---|
| 2082 | | - AESENC KEY STATE4 |
|---|
| 2068 | + aesenc KEY, STATE1 |
|---|
| 2069 | + aesenc KEY, STATE2 |
|---|
| 2070 | + aesenc KEY, STATE3 |
|---|
| 2071 | + aesenc KEY, STATE4 |
|---|
| 2083 | 2072 | movaps -0x30(TKEYP), KEY |
|---|
| 2084 | | - AESENC KEY STATE1 |
|---|
| 2085 | | - AESENC KEY STATE2 |
|---|
| 2086 | | - AESENC KEY STATE3 |
|---|
| 2087 | | - AESENC KEY STATE4 |
|---|
| 2073 | + aesenc KEY, STATE1 |
|---|
| 2074 | + aesenc KEY, STATE2 |
|---|
| 2075 | + aesenc KEY, STATE3 |
|---|
| 2076 | + aesenc KEY, STATE4 |
|---|
| 2088 | 2077 | #.align 4 |
|---|
| 2089 | 2078 | .L4enc128: |
|---|
| 2090 | 2079 | movaps -0x20(TKEYP), KEY |
|---|
| 2091 | | - AESENC KEY STATE1 |
|---|
| 2092 | | - AESENC KEY STATE2 |
|---|
| 2093 | | - AESENC KEY STATE3 |
|---|
| 2094 | | - AESENC KEY STATE4 |
|---|
| 2080 | + aesenc KEY, STATE1 |
|---|
| 2081 | + aesenc KEY, STATE2 |
|---|
| 2082 | + aesenc KEY, STATE3 |
|---|
| 2083 | + aesenc KEY, STATE4 |
|---|
| 2095 | 2084 | movaps -0x10(TKEYP), KEY |
|---|
| 2096 | | - AESENC KEY STATE1 |
|---|
| 2097 | | - AESENC KEY STATE2 |
|---|
| 2098 | | - AESENC KEY STATE3 |
|---|
| 2099 | | - AESENC KEY STATE4 |
|---|
| 2085 | + aesenc KEY, STATE1 |
|---|
| 2086 | + aesenc KEY, STATE2 |
|---|
| 2087 | + aesenc KEY, STATE3 |
|---|
| 2088 | + aesenc KEY, STATE4 |
|---|
| 2100 | 2089 | movaps (TKEYP), KEY |
|---|
| 2101 | | - AESENC KEY STATE1 |
|---|
| 2102 | | - AESENC KEY STATE2 |
|---|
| 2103 | | - AESENC KEY STATE3 |
|---|
| 2104 | | - AESENC KEY STATE4 |
|---|
| 2090 | + aesenc KEY, STATE1 |
|---|
| 2091 | + aesenc KEY, STATE2 |
|---|
| 2092 | + aesenc KEY, STATE3 |
|---|
| 2093 | + aesenc KEY, STATE4 |
|---|
| 2105 | 2094 | movaps 0x10(TKEYP), KEY |
|---|
| 2106 | | - AESENC KEY STATE1 |
|---|
| 2107 | | - AESENC KEY STATE2 |
|---|
| 2108 | | - AESENC KEY STATE3 |
|---|
| 2109 | | - AESENC KEY STATE4 |
|---|
| 2095 | + aesenc KEY, STATE1 |
|---|
| 2096 | + aesenc KEY, STATE2 |
|---|
| 2097 | + aesenc KEY, STATE3 |
|---|
| 2098 | + aesenc KEY, STATE4 |
|---|
| 2110 | 2099 | movaps 0x20(TKEYP), KEY |
|---|
| 2111 | | - AESENC KEY STATE1 |
|---|
| 2112 | | - AESENC KEY STATE2 |
|---|
| 2113 | | - AESENC KEY STATE3 |
|---|
| 2114 | | - AESENC KEY STATE4 |
|---|
| 2100 | + aesenc KEY, STATE1 |
|---|
| 2101 | + aesenc KEY, STATE2 |
|---|
| 2102 | + aesenc KEY, STATE3 |
|---|
| 2103 | + aesenc KEY, STATE4 |
|---|
| 2115 | 2104 | movaps 0x30(TKEYP), KEY |
|---|
| 2116 | | - AESENC KEY STATE1 |
|---|
| 2117 | | - AESENC KEY STATE2 |
|---|
| 2118 | | - AESENC KEY STATE3 |
|---|
| 2119 | | - AESENC KEY STATE4 |
|---|
| 2105 | + aesenc KEY, STATE1 |
|---|
| 2106 | + aesenc KEY, STATE2 |
|---|
| 2107 | + aesenc KEY, STATE3 |
|---|
| 2108 | + aesenc KEY, STATE4 |
|---|
| 2120 | 2109 | movaps 0x40(TKEYP), KEY |
|---|
| 2121 | | - AESENC KEY STATE1 |
|---|
| 2122 | | - AESENC KEY STATE2 |
|---|
| 2123 | | - AESENC KEY STATE3 |
|---|
| 2124 | | - AESENC KEY STATE4 |
|---|
| 2110 | + aesenc KEY, STATE1 |
|---|
| 2111 | + aesenc KEY, STATE2 |
|---|
| 2112 | + aesenc KEY, STATE3 |
|---|
| 2113 | + aesenc KEY, STATE4 |
|---|
| 2125 | 2114 | movaps 0x50(TKEYP), KEY |
|---|
| 2126 | | - AESENC KEY STATE1 |
|---|
| 2127 | | - AESENC KEY STATE2 |
|---|
| 2128 | | - AESENC KEY STATE3 |
|---|
| 2129 | | - AESENC KEY STATE4 |
|---|
| 2115 | + aesenc KEY, STATE1 |
|---|
| 2116 | + aesenc KEY, STATE2 |
|---|
| 2117 | + aesenc KEY, STATE3 |
|---|
| 2118 | + aesenc KEY, STATE4 |
|---|
| 2130 | 2119 | movaps 0x60(TKEYP), KEY |
|---|
| 2131 | | - AESENC KEY STATE1 |
|---|
| 2132 | | - AESENC KEY STATE2 |
|---|
| 2133 | | - AESENC KEY STATE3 |
|---|
| 2134 | | - AESENC KEY STATE4 |
|---|
| 2120 | + aesenc KEY, STATE1 |
|---|
| 2121 | + aesenc KEY, STATE2 |
|---|
| 2122 | + aesenc KEY, STATE3 |
|---|
| 2123 | + aesenc KEY, STATE4 |
|---|
| 2135 | 2124 | movaps 0x70(TKEYP), KEY |
|---|
| 2136 | | - AESENCLAST KEY STATE1 # last round |
|---|
| 2137 | | - AESENCLAST KEY STATE2 |
|---|
| 2138 | | - AESENCLAST KEY STATE3 |
|---|
| 2139 | | - AESENCLAST KEY STATE4 |
|---|
| 2140 | | - ret |
|---|
| 2141 | | -ENDPROC(_aesni_enc4) |
|---|
| 2125 | + aesenclast KEY, STATE1 # last round |
|---|
| 2126 | + aesenclast KEY, STATE2 |
|---|
| 2127 | + aesenclast KEY, STATE3 |
|---|
| 2128 | + aesenclast KEY, STATE4 |
|---|
| 2129 | + RET |
|---|
| 2130 | +SYM_FUNC_END(_aesni_enc4) |
|---|
| 2142 | 2131 | |
|---|
| 2143 | 2132 | /* |
|---|
| 2144 | | - * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) |
|---|
| 2133 | + * void aesni_dec (const void *ctx, u8 *dst, const u8 *src) |
|---|
| 2145 | 2134 | */ |
|---|
| 2146 | | -ENTRY(aesni_dec) |
|---|
| 2135 | +SYM_FUNC_START(aesni_dec) |
|---|
| 2147 | 2136 | FRAME_BEGIN |
|---|
| 2148 | 2137 | #ifndef __x86_64__ |
|---|
| 2149 | 2138 | pushl KEYP |
|---|
| .. | .. |
|---|
| 2162 | 2151 | popl KEYP |
|---|
| 2163 | 2152 | #endif |
|---|
| 2164 | 2153 | FRAME_END |
|---|
| 2165 | | - ret |
|---|
| 2166 | | -ENDPROC(aesni_dec) |
|---|
| 2154 | + RET |
|---|
| 2155 | +SYM_FUNC_END(aesni_dec) |
|---|
| 2167 | 2156 | |
|---|
| 2168 | 2157 | /* |
|---|
| 2169 | 2158 | * _aesni_dec1: internal ABI |
|---|
| .. | .. |
|---|
| 2177 | 2166 | * KEY |
|---|
| 2178 | 2167 | * TKEYP (T1) |
|---|
| 2179 | 2168 | */ |
|---|
| 2180 | | -.align 4 |
|---|
| 2181 | | -_aesni_dec1: |
|---|
| 2169 | +SYM_FUNC_START_LOCAL(_aesni_dec1) |
|---|
| 2182 | 2170 | movaps (KEYP), KEY # key |
|---|
| 2183 | 2171 | mov KEYP, TKEYP |
|---|
| 2184 | 2172 | pxor KEY, STATE # round 0 |
|---|
| .. | .. |
|---|
| 2189 | 2177 | je .Ldec192 |
|---|
| 2190 | 2178 | add $0x20, TKEYP |
|---|
| 2191 | 2179 | movaps -0x60(TKEYP), KEY |
|---|
| 2192 | | - AESDEC KEY STATE |
|---|
| 2180 | + aesdec KEY, STATE |
|---|
| 2193 | 2181 | movaps -0x50(TKEYP), KEY |
|---|
| 2194 | | - AESDEC KEY STATE |
|---|
| 2182 | + aesdec KEY, STATE |
|---|
| 2195 | 2183 | .align 4 |
|---|
| 2196 | 2184 | .Ldec192: |
|---|
| 2197 | 2185 | movaps -0x40(TKEYP), KEY |
|---|
| 2198 | | - AESDEC KEY STATE |
|---|
| 2186 | + aesdec KEY, STATE |
|---|
| 2199 | 2187 | movaps -0x30(TKEYP), KEY |
|---|
| 2200 | | - AESDEC KEY STATE |
|---|
| 2188 | + aesdec KEY, STATE |
|---|
| 2201 | 2189 | .align 4 |
|---|
| 2202 | 2190 | .Ldec128: |
|---|
| 2203 | 2191 | movaps -0x20(TKEYP), KEY |
|---|
| 2204 | | - AESDEC KEY STATE |
|---|
| 2192 | + aesdec KEY, STATE |
|---|
| 2205 | 2193 | movaps -0x10(TKEYP), KEY |
|---|
| 2206 | | - AESDEC KEY STATE |
|---|
| 2194 | + aesdec KEY, STATE |
|---|
| 2207 | 2195 | movaps (TKEYP), KEY |
|---|
| 2208 | | - AESDEC KEY STATE |
|---|
| 2196 | + aesdec KEY, STATE |
|---|
| 2209 | 2197 | movaps 0x10(TKEYP), KEY |
|---|
| 2210 | | - AESDEC KEY STATE |
|---|
| 2198 | + aesdec KEY, STATE |
|---|
| 2211 | 2199 | movaps 0x20(TKEYP), KEY |
|---|
| 2212 | | - AESDEC KEY STATE |
|---|
| 2200 | + aesdec KEY, STATE |
|---|
| 2213 | 2201 | movaps 0x30(TKEYP), KEY |
|---|
| 2214 | | - AESDEC KEY STATE |
|---|
| 2202 | + aesdec KEY, STATE |
|---|
| 2215 | 2203 | movaps 0x40(TKEYP), KEY |
|---|
| 2216 | | - AESDEC KEY STATE |
|---|
| 2204 | + aesdec KEY, STATE |
|---|
| 2217 | 2205 | movaps 0x50(TKEYP), KEY |
|---|
| 2218 | | - AESDEC KEY STATE |
|---|
| 2206 | + aesdec KEY, STATE |
|---|
| 2219 | 2207 | movaps 0x60(TKEYP), KEY |
|---|
| 2220 | | - AESDEC KEY STATE |
|---|
| 2208 | + aesdec KEY, STATE |
|---|
| 2221 | 2209 | movaps 0x70(TKEYP), KEY |
|---|
| 2222 | | - AESDECLAST KEY STATE |
|---|
| 2223 | | - ret |
|---|
| 2224 | | -ENDPROC(_aesni_dec1) |
|---|
| 2210 | + aesdeclast KEY, STATE |
|---|
| 2211 | + RET |
|---|
| 2212 | +SYM_FUNC_END(_aesni_dec1) |
|---|
| 2225 | 2213 | |
|---|
| 2226 | 2214 | /* |
|---|
| 2227 | 2215 | * _aesni_dec4: internal ABI |
|---|
| .. | .. |
|---|
| 2241 | 2229 | * KEY |
|---|
| 2242 | 2230 | * TKEYP (T1) |
|---|
| 2243 | 2231 | */ |
|---|
| 2244 | | -.align 4 |
|---|
| 2245 | | -_aesni_dec4: |
|---|
| 2232 | +SYM_FUNC_START_LOCAL(_aesni_dec4) |
|---|
| 2246 | 2233 | movaps (KEYP), KEY # key |
|---|
| 2247 | 2234 | mov KEYP, TKEYP |
|---|
| 2248 | 2235 | pxor KEY, STATE1 # round 0 |
|---|
| .. | .. |
|---|
| 2256 | 2243 | je .L4dec192 |
|---|
| 2257 | 2244 | add $0x20, TKEYP |
|---|
| 2258 | 2245 | movaps -0x60(TKEYP), KEY |
|---|
| 2259 | | - AESDEC KEY STATE1 |
|---|
| 2260 | | - AESDEC KEY STATE2 |
|---|
| 2261 | | - AESDEC KEY STATE3 |
|---|
| 2262 | | - AESDEC KEY STATE4 |
|---|
| 2246 | + aesdec KEY, STATE1 |
|---|
| 2247 | + aesdec KEY, STATE2 |
|---|
| 2248 | + aesdec KEY, STATE3 |
|---|
| 2249 | + aesdec KEY, STATE4 |
|---|
| 2263 | 2250 | movaps -0x50(TKEYP), KEY |
|---|
| 2264 | | - AESDEC KEY STATE1 |
|---|
| 2265 | | - AESDEC KEY STATE2 |
|---|
| 2266 | | - AESDEC KEY STATE3 |
|---|
| 2267 | | - AESDEC KEY STATE4 |
|---|
| 2251 | + aesdec KEY, STATE1 |
|---|
| 2252 | + aesdec KEY, STATE2 |
|---|
| 2253 | + aesdec KEY, STATE3 |
|---|
| 2254 | + aesdec KEY, STATE4 |
|---|
| 2268 | 2255 | .align 4 |
|---|
| 2269 | 2256 | .L4dec192: |
|---|
| 2270 | 2257 | movaps -0x40(TKEYP), KEY |
|---|
| 2271 | | - AESDEC KEY STATE1 |
|---|
| 2272 | | - AESDEC KEY STATE2 |
|---|
| 2273 | | - AESDEC KEY STATE3 |
|---|
| 2274 | | - AESDEC KEY STATE4 |
|---|
| 2258 | + aesdec KEY, STATE1 |
|---|
| 2259 | + aesdec KEY, STATE2 |
|---|
| 2260 | + aesdec KEY, STATE3 |
|---|
| 2261 | + aesdec KEY, STATE4 |
|---|
| 2275 | 2262 | movaps -0x30(TKEYP), KEY |
|---|
| 2276 | | - AESDEC KEY STATE1 |
|---|
| 2277 | | - AESDEC KEY STATE2 |
|---|
| 2278 | | - AESDEC KEY STATE3 |
|---|
| 2279 | | - AESDEC KEY STATE4 |
|---|
| 2263 | + aesdec KEY, STATE1 |
|---|
| 2264 | + aesdec KEY, STATE2 |
|---|
| 2265 | + aesdec KEY, STATE3 |
|---|
| 2266 | + aesdec KEY, STATE4 |
|---|
| 2280 | 2267 | .align 4 |
|---|
| 2281 | 2268 | .L4dec128: |
|---|
| 2282 | 2269 | movaps -0x20(TKEYP), KEY |
|---|
| 2283 | | - AESDEC KEY STATE1 |
|---|
| 2284 | | - AESDEC KEY STATE2 |
|---|
| 2285 | | - AESDEC KEY STATE3 |
|---|
| 2286 | | - AESDEC KEY STATE4 |
|---|
| 2270 | + aesdec KEY, STATE1 |
|---|
| 2271 | + aesdec KEY, STATE2 |
|---|
| 2272 | + aesdec KEY, STATE3 |
|---|
| 2273 | + aesdec KEY, STATE4 |
|---|
| 2287 | 2274 | movaps -0x10(TKEYP), KEY |
|---|
| 2288 | | - AESDEC KEY STATE1 |
|---|
| 2289 | | - AESDEC KEY STATE2 |
|---|
| 2290 | | - AESDEC KEY STATE3 |
|---|
| 2291 | | - AESDEC KEY STATE4 |
|---|
| 2275 | + aesdec KEY, STATE1 |
|---|
| 2276 | + aesdec KEY, STATE2 |
|---|
| 2277 | + aesdec KEY, STATE3 |
|---|
| 2278 | + aesdec KEY, STATE4 |
|---|
| 2292 | 2279 | movaps (TKEYP), KEY |
|---|
| 2293 | | - AESDEC KEY STATE1 |
|---|
| 2294 | | - AESDEC KEY STATE2 |
|---|
| 2295 | | - AESDEC KEY STATE3 |
|---|
| 2296 | | - AESDEC KEY STATE4 |
|---|
| 2280 | + aesdec KEY, STATE1 |
|---|
| 2281 | + aesdec KEY, STATE2 |
|---|
| 2282 | + aesdec KEY, STATE3 |
|---|
| 2283 | + aesdec KEY, STATE4 |
|---|
| 2297 | 2284 | movaps 0x10(TKEYP), KEY |
|---|
| 2298 | | - AESDEC KEY STATE1 |
|---|
| 2299 | | - AESDEC KEY STATE2 |
|---|
| 2300 | | - AESDEC KEY STATE3 |
|---|
| 2301 | | - AESDEC KEY STATE4 |
|---|
| 2285 | + aesdec KEY, STATE1 |
|---|
| 2286 | + aesdec KEY, STATE2 |
|---|
| 2287 | + aesdec KEY, STATE3 |
|---|
| 2288 | + aesdec KEY, STATE4 |
|---|
| 2302 | 2289 | movaps 0x20(TKEYP), KEY |
|---|
| 2303 | | - AESDEC KEY STATE1 |
|---|
| 2304 | | - AESDEC KEY STATE2 |
|---|
| 2305 | | - AESDEC KEY STATE3 |
|---|
| 2306 | | - AESDEC KEY STATE4 |
|---|
| 2290 | + aesdec KEY, STATE1 |
|---|
| 2291 | + aesdec KEY, STATE2 |
|---|
| 2292 | + aesdec KEY, STATE3 |
|---|
| 2293 | + aesdec KEY, STATE4 |
|---|
| 2307 | 2294 | movaps 0x30(TKEYP), KEY |
|---|
| 2308 | | - AESDEC KEY STATE1 |
|---|
| 2309 | | - AESDEC KEY STATE2 |
|---|
| 2310 | | - AESDEC KEY STATE3 |
|---|
| 2311 | | - AESDEC KEY STATE4 |
|---|
| 2295 | + aesdec KEY, STATE1 |
|---|
| 2296 | + aesdec KEY, STATE2 |
|---|
| 2297 | + aesdec KEY, STATE3 |
|---|
| 2298 | + aesdec KEY, STATE4 |
|---|
| 2312 | 2299 | movaps 0x40(TKEYP), KEY |
|---|
| 2313 | | - AESDEC KEY STATE1 |
|---|
| 2314 | | - AESDEC KEY STATE2 |
|---|
| 2315 | | - AESDEC KEY STATE3 |
|---|
| 2316 | | - AESDEC KEY STATE4 |
|---|
| 2300 | + aesdec KEY, STATE1 |
|---|
| 2301 | + aesdec KEY, STATE2 |
|---|
| 2302 | + aesdec KEY, STATE3 |
|---|
| 2303 | + aesdec KEY, STATE4 |
|---|
| 2317 | 2304 | movaps 0x50(TKEYP), KEY |
|---|
| 2318 | | - AESDEC KEY STATE1 |
|---|
| 2319 | | - AESDEC KEY STATE2 |
|---|
| 2320 | | - AESDEC KEY STATE3 |
|---|
| 2321 | | - AESDEC KEY STATE4 |
|---|
| 2305 | + aesdec KEY, STATE1 |
|---|
| 2306 | + aesdec KEY, STATE2 |
|---|
| 2307 | + aesdec KEY, STATE3 |
|---|
| 2308 | + aesdec KEY, STATE4 |
|---|
| 2322 | 2309 | movaps 0x60(TKEYP), KEY |
|---|
| 2323 | | - AESDEC KEY STATE1 |
|---|
| 2324 | | - AESDEC KEY STATE2 |
|---|
| 2325 | | - AESDEC KEY STATE3 |
|---|
| 2326 | | - AESDEC KEY STATE4 |
|---|
| 2310 | + aesdec KEY, STATE1 |
|---|
| 2311 | + aesdec KEY, STATE2 |
|---|
| 2312 | + aesdec KEY, STATE3 |
|---|
| 2313 | + aesdec KEY, STATE4 |
|---|
| 2327 | 2314 | movaps 0x70(TKEYP), KEY |
|---|
| 2328 | | - AESDECLAST KEY STATE1 # last round |
|---|
| 2329 | | - AESDECLAST KEY STATE2 |
|---|
| 2330 | | - AESDECLAST KEY STATE3 |
|---|
| 2331 | | - AESDECLAST KEY STATE4 |
|---|
| 2332 | | - ret |
|---|
| 2333 | | -ENDPROC(_aesni_dec4) |
|---|
| 2315 | + aesdeclast KEY, STATE1 # last round |
|---|
| 2316 | + aesdeclast KEY, STATE2 |
|---|
| 2317 | + aesdeclast KEY, STATE3 |
|---|
| 2318 | + aesdeclast KEY, STATE4 |
|---|
| 2319 | + RET |
|---|
| 2320 | +SYM_FUNC_END(_aesni_dec4) |
|---|
| 2334 | 2321 | |
|---|
| 2335 | 2322 | /* |
|---|
| 2336 | 2323 | * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, |
|---|
| 2337 | 2324 | * size_t len) |
|---|
| 2338 | 2325 | */ |
|---|
| 2339 | | -ENTRY(aesni_ecb_enc) |
|---|
| 2326 | +SYM_FUNC_START(aesni_ecb_enc) |
|---|
| 2340 | 2327 | FRAME_BEGIN |
|---|
| 2341 | 2328 | #ifndef __x86_64__ |
|---|
| 2342 | 2329 | pushl LEN |
|---|
| .. | .. |
|---|
| 2389 | 2376 | popl LEN |
|---|
| 2390 | 2377 | #endif |
|---|
| 2391 | 2378 | FRAME_END |
|---|
| 2392 | | - ret |
|---|
| 2393 | | -ENDPROC(aesni_ecb_enc) |
|---|
| 2379 | + RET |
|---|
| 2380 | +SYM_FUNC_END(aesni_ecb_enc) |
|---|
| 2394 | 2381 | |
|---|
| 2395 | 2382 | /* |
|---|
| 2396 | 2383 | * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, |
|---|
| 2397 | 2384 | * size_t len); |
|---|
| 2398 | 2385 | */ |
|---|
| 2399 | | -ENTRY(aesni_ecb_dec) |
|---|
| 2386 | +SYM_FUNC_START(aesni_ecb_dec) |
|---|
| 2400 | 2387 | FRAME_BEGIN |
|---|
| 2401 | 2388 | #ifndef __x86_64__ |
|---|
| 2402 | 2389 | pushl LEN |
|---|
| .. | .. |
|---|
| 2450 | 2437 | popl LEN |
|---|
| 2451 | 2438 | #endif |
|---|
| 2452 | 2439 | FRAME_END |
|---|
| 2453 | | - ret |
|---|
| 2454 | | -ENDPROC(aesni_ecb_dec) |
|---|
| 2440 | + RET |
|---|
| 2441 | +SYM_FUNC_END(aesni_ecb_dec) |
|---|
| 2455 | 2442 | |
|---|
| 2456 | 2443 | /* |
|---|
| 2457 | 2444 | * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, |
|---|
| 2458 | 2445 | * size_t len, u8 *iv) |
|---|
| 2459 | 2446 | */ |
|---|
| 2460 | | -ENTRY(aesni_cbc_enc) |
|---|
| 2447 | +SYM_FUNC_START(aesni_cbc_enc) |
|---|
| 2461 | 2448 | FRAME_BEGIN |
|---|
| 2462 | 2449 | #ifndef __x86_64__ |
|---|
| 2463 | 2450 | pushl IVP |
|---|
| .. | .. |
|---|
| 2494 | 2481 | popl IVP |
|---|
| 2495 | 2482 | #endif |
|---|
| 2496 | 2483 | FRAME_END |
|---|
| 2497 | | - ret |
|---|
| 2498 | | -ENDPROC(aesni_cbc_enc) |
|---|
| 2484 | + RET |
|---|
| 2485 | +SYM_FUNC_END(aesni_cbc_enc) |
|---|
| 2499 | 2486 | |
|---|
| 2500 | 2487 | /* |
|---|
| 2501 | 2488 | * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, |
|---|
| 2502 | 2489 | * size_t len, u8 *iv) |
|---|
| 2503 | 2490 | */ |
|---|
| 2504 | | -ENTRY(aesni_cbc_dec) |
|---|
| 2491 | +SYM_FUNC_START(aesni_cbc_dec) |
|---|
| 2505 | 2492 | FRAME_BEGIN |
|---|
| 2506 | 2493 | #ifndef __x86_64__ |
|---|
| 2507 | 2494 | pushl IVP |
|---|
| .. | .. |
|---|
| 2587 | 2574 | popl IVP |
|---|
| 2588 | 2575 | #endif |
|---|
| 2589 | 2576 | FRAME_END |
|---|
| 2590 | | - ret |
|---|
| 2591 | | -ENDPROC(aesni_cbc_dec) |
|---|
| 2577 | + RET |
|---|
| 2578 | +SYM_FUNC_END(aesni_cbc_dec) |
|---|
| 2592 | 2579 | |
|---|
| 2593 | 2580 | #ifdef __x86_64__ |
|---|
| 2594 | 2581 | .pushsection .rodata |
|---|
| .. | .. |
|---|
| 2608 | 2595 | * INC: == 1, in little endian |
|---|
| 2609 | 2596 | * BSWAP_MASK == endian swapping mask |
|---|
| 2610 | 2597 | */ |
|---|
| 2611 | | -.align 4 |
|---|
| 2612 | | -_aesni_inc_init: |
|---|
| 2598 | +SYM_FUNC_START_LOCAL(_aesni_inc_init) |
|---|
| 2613 | 2599 | movaps .Lbswap_mask, BSWAP_MASK |
|---|
| 2614 | 2600 | movaps IV, CTR |
|---|
| 2615 | | - PSHUFB_XMM BSWAP_MASK CTR |
|---|
| 2601 | + pshufb BSWAP_MASK, CTR |
|---|
| 2616 | 2602 | mov $1, TCTR_LOW |
|---|
| 2617 | | - MOVQ_R64_XMM TCTR_LOW INC |
|---|
| 2618 | | - MOVQ_R64_XMM CTR TCTR_LOW |
|---|
| 2619 | | - ret |
|---|
| 2620 | | -ENDPROC(_aesni_inc_init) |
|---|
| 2603 | + movq TCTR_LOW, INC |
|---|
| 2604 | + movq CTR, TCTR_LOW |
|---|
| 2605 | + RET |
|---|
| 2606 | +SYM_FUNC_END(_aesni_inc_init) |
|---|
| 2621 | 2607 | |
|---|
| 2622 | 2608 | /* |
|---|
| 2623 | 2609 | * _aesni_inc: internal ABI |
|---|
| .. | .. |
|---|
| 2634 | 2620 | * CTR: == output IV, in little endian |
|---|
| 2635 | 2621 | * TCTR_LOW: == lower qword of CTR |
|---|
| 2636 | 2622 | */ |
|---|
| 2637 | | -.align 4 |
|---|
| 2638 | | -_aesni_inc: |
|---|
| 2623 | +SYM_FUNC_START_LOCAL(_aesni_inc) |
|---|
| 2639 | 2624 | paddq INC, CTR |
|---|
| 2640 | 2625 | add $1, TCTR_LOW |
|---|
| 2641 | 2626 | jnc .Linc_low |
|---|
| .. | .. |
|---|
| 2644 | 2629 | psrldq $8, INC |
|---|
| 2645 | 2630 | .Linc_low: |
|---|
| 2646 | 2631 | movaps CTR, IV |
|---|
| 2647 | | - PSHUFB_XMM BSWAP_MASK IV |
|---|
| 2648 | | - ret |
|---|
| 2649 | | -ENDPROC(_aesni_inc) |
|---|
| 2632 | + pshufb BSWAP_MASK, IV |
|---|
| 2633 | + RET |
|---|
| 2634 | +SYM_FUNC_END(_aesni_inc) |
|---|
| 2650 | 2635 | |
|---|
| 2651 | 2636 | /* |
|---|
| 2652 | 2637 | * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, |
|---|
| 2653 | 2638 | * size_t len, u8 *iv) |
|---|
| 2654 | 2639 | */ |
|---|
| 2655 | | -ENTRY(aesni_ctr_enc) |
|---|
| 2640 | +SYM_FUNC_START(aesni_ctr_enc) |
|---|
| 2656 | 2641 | FRAME_BEGIN |
|---|
| 2657 | 2642 | cmp $16, LEN |
|---|
| 2658 | 2643 | jb .Lctr_enc_just_ret |
|---|
| .. | .. |
|---|
| 2708 | 2693 | movups IV, (IVP) |
|---|
| 2709 | 2694 | .Lctr_enc_just_ret: |
|---|
| 2710 | 2695 | FRAME_END |
|---|
| 2711 | | - ret |
|---|
| 2712 | | -ENDPROC(aesni_ctr_enc) |
|---|
| 2696 | + RET |
|---|
| 2697 | +SYM_FUNC_END(aesni_ctr_enc) |
|---|
| 2713 | 2698 | |
|---|
| 2714 | 2699 | /* |
|---|
| 2715 | 2700 | * _aesni_gf128mul_x_ble: internal ABI |
|---|
| .. | .. |
|---|
| 2730 | 2715 | pxor CTR, IV; |
|---|
| 2731 | 2716 | |
|---|
| 2732 | 2717 | /* |
|---|
| 2733 | | - * void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, |
|---|
| 2734 | | - * bool enc, u8 *iv) |
|---|
| 2718 | + * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, |
|---|
| 2719 | + * const u8 *src, unsigned int len, le128 *iv) |
|---|
| 2735 | 2720 | */ |
|---|
| 2736 | | -ENTRY(aesni_xts_crypt8) |
|---|
| 2721 | +SYM_FUNC_START(aesni_xts_encrypt) |
|---|
| 2737 | 2722 | FRAME_BEGIN |
|---|
| 2738 | | - cmpb $0, %cl |
|---|
| 2739 | | - movl $0, %ecx |
|---|
| 2740 | | - movl $240, %r10d |
|---|
| 2741 | | - leaq _aesni_enc4, %r11 |
|---|
| 2742 | | - leaq _aesni_dec4, %rax |
|---|
| 2743 | | - cmovel %r10d, %ecx |
|---|
| 2744 | | - cmoveq %rax, %r11 |
|---|
| 2745 | 2723 | |
|---|
| 2746 | 2724 | movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK |
|---|
| 2747 | 2725 | movups (IVP), IV |
|---|
| 2748 | 2726 | |
|---|
| 2749 | 2727 | mov 480(KEYP), KLEN |
|---|
| 2750 | | - addq %rcx, KEYP |
|---|
| 2751 | 2728 | |
|---|
| 2729 | +.Lxts_enc_loop4: |
|---|
| 2752 | 2730 | movdqa IV, STATE1 |
|---|
| 2753 | 2731 | movdqu 0x00(INP), INC |
|---|
| 2754 | 2732 | pxor INC, STATE1 |
|---|
| .. | .. |
|---|
| 2772 | 2750 | pxor INC, STATE4 |
|---|
| 2773 | 2751 | movdqu IV, 0x30(OUTP) |
|---|
| 2774 | 2752 | |
|---|
| 2775 | | - CALL_NOSPEC %r11 |
|---|
| 2753 | + call _aesni_enc4 |
|---|
| 2776 | 2754 | |
|---|
| 2777 | 2755 | movdqu 0x00(OUTP), INC |
|---|
| 2778 | 2756 | pxor INC, STATE1 |
|---|
| 2779 | 2757 | movdqu STATE1, 0x00(OUTP) |
|---|
| 2780 | 2758 | |
|---|
| 2781 | | - _aesni_gf128mul_x_ble() |
|---|
| 2782 | | - movdqa IV, STATE1 |
|---|
| 2783 | | - movdqu 0x40(INP), INC |
|---|
| 2784 | | - pxor INC, STATE1 |
|---|
| 2785 | | - movdqu IV, 0x40(OUTP) |
|---|
| 2786 | | - |
|---|
| 2787 | 2759 | movdqu 0x10(OUTP), INC |
|---|
| 2788 | 2760 | pxor INC, STATE2 |
|---|
| 2789 | 2761 | movdqu STATE2, 0x10(OUTP) |
|---|
| 2790 | 2762 | |
|---|
| 2791 | | - _aesni_gf128mul_x_ble() |
|---|
| 2792 | | - movdqa IV, STATE2 |
|---|
| 2793 | | - movdqu 0x50(INP), INC |
|---|
| 2794 | | - pxor INC, STATE2 |
|---|
| 2795 | | - movdqu IV, 0x50(OUTP) |
|---|
| 2796 | | - |
|---|
| 2797 | 2763 | movdqu 0x20(OUTP), INC |
|---|
| 2798 | 2764 | pxor INC, STATE3 |
|---|
| 2799 | 2765 | movdqu STATE3, 0x20(OUTP) |
|---|
| 2800 | | - |
|---|
| 2801 | | - _aesni_gf128mul_x_ble() |
|---|
| 2802 | | - movdqa IV, STATE3 |
|---|
| 2803 | | - movdqu 0x60(INP), INC |
|---|
| 2804 | | - pxor INC, STATE3 |
|---|
| 2805 | | - movdqu IV, 0x60(OUTP) |
|---|
| 2806 | 2766 | |
|---|
| 2807 | 2767 | movdqu 0x30(OUTP), INC |
|---|
| 2808 | 2768 | pxor INC, STATE4 |
|---|
| 2809 | 2769 | movdqu STATE4, 0x30(OUTP) |
|---|
| 2810 | 2770 | |
|---|
| 2811 | 2771 | _aesni_gf128mul_x_ble() |
|---|
| 2812 | | - movdqa IV, STATE4 |
|---|
| 2813 | | - movdqu 0x70(INP), INC |
|---|
| 2814 | | - pxor INC, STATE4 |
|---|
| 2815 | | - movdqu IV, 0x70(OUTP) |
|---|
| 2816 | 2772 | |
|---|
| 2817 | | - _aesni_gf128mul_x_ble() |
|---|
| 2773 | + add $64, INP |
|---|
| 2774 | + add $64, OUTP |
|---|
| 2775 | + sub $64, LEN |
|---|
| 2776 | + ja .Lxts_enc_loop4 |
|---|
| 2777 | + |
|---|
| 2818 | 2778 | movups IV, (IVP) |
|---|
| 2819 | 2779 | |
|---|
| 2820 | | - CALL_NOSPEC %r11 |
|---|
| 2780 | + FRAME_END |
|---|
| 2781 | + RET |
|---|
| 2782 | +SYM_FUNC_END(aesni_xts_encrypt) |
|---|
| 2821 | 2783 | |
|---|
| 2822 | | - movdqu 0x40(OUTP), INC |
|---|
| 2784 | +/* |
|---|
| 2785 | + * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, |
|---|
| 2786 | + * const u8 *src, unsigned int len, le128 *iv) |
|---|
| 2787 | + */ |
|---|
| 2788 | +SYM_FUNC_START(aesni_xts_decrypt) |
|---|
| 2789 | + FRAME_BEGIN |
|---|
| 2790 | + |
|---|
| 2791 | + movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK |
|---|
| 2792 | + movups (IVP), IV |
|---|
| 2793 | + |
|---|
| 2794 | + mov 480(KEYP), KLEN |
|---|
| 2795 | + add $240, KEYP |
|---|
| 2796 | + |
|---|
| 2797 | +.Lxts_dec_loop4: |
|---|
| 2798 | + movdqa IV, STATE1 |
|---|
| 2799 | + movdqu 0x00(INP), INC |
|---|
| 2823 | 2800 | pxor INC, STATE1 |
|---|
| 2824 | | - movdqu STATE1, 0x40(OUTP) |
|---|
| 2801 | + movdqu IV, 0x00(OUTP) |
|---|
| 2825 | 2802 | |
|---|
| 2826 | | - movdqu 0x50(OUTP), INC |
|---|
| 2803 | + _aesni_gf128mul_x_ble() |
|---|
| 2804 | + movdqa IV, STATE2 |
|---|
| 2805 | + movdqu 0x10(INP), INC |
|---|
| 2827 | 2806 | pxor INC, STATE2 |
|---|
| 2828 | | - movdqu STATE2, 0x50(OUTP) |
|---|
| 2807 | + movdqu IV, 0x10(OUTP) |
|---|
| 2829 | 2808 | |
|---|
| 2830 | | - movdqu 0x60(OUTP), INC |
|---|
| 2809 | + _aesni_gf128mul_x_ble() |
|---|
| 2810 | + movdqa IV, STATE3 |
|---|
| 2811 | + movdqu 0x20(INP), INC |
|---|
| 2831 | 2812 | pxor INC, STATE3 |
|---|
| 2832 | | - movdqu STATE3, 0x60(OUTP) |
|---|
| 2813 | + movdqu IV, 0x20(OUTP) |
|---|
| 2833 | 2814 | |
|---|
| 2834 | | - movdqu 0x70(OUTP), INC |
|---|
| 2815 | + _aesni_gf128mul_x_ble() |
|---|
| 2816 | + movdqa IV, STATE4 |
|---|
| 2817 | + movdqu 0x30(INP), INC |
|---|
| 2835 | 2818 | pxor INC, STATE4 |
|---|
| 2836 | | - movdqu STATE4, 0x70(OUTP) |
|---|
| 2819 | + movdqu IV, 0x30(OUTP) |
|---|
| 2820 | + |
|---|
| 2821 | + call _aesni_dec4 |
|---|
| 2822 | + |
|---|
| 2823 | + movdqu 0x00(OUTP), INC |
|---|
| 2824 | + pxor INC, STATE1 |
|---|
| 2825 | + movdqu STATE1, 0x00(OUTP) |
|---|
| 2826 | + |
|---|
| 2827 | + movdqu 0x10(OUTP), INC |
|---|
| 2828 | + pxor INC, STATE2 |
|---|
| 2829 | + movdqu STATE2, 0x10(OUTP) |
|---|
| 2830 | + |
|---|
| 2831 | + movdqu 0x20(OUTP), INC |
|---|
| 2832 | + pxor INC, STATE3 |
|---|
| 2833 | + movdqu STATE3, 0x20(OUTP) |
|---|
| 2834 | + |
|---|
| 2835 | + movdqu 0x30(OUTP), INC |
|---|
| 2836 | + pxor INC, STATE4 |
|---|
| 2837 | + movdqu STATE4, 0x30(OUTP) |
|---|
| 2838 | + |
|---|
| 2839 | + _aesni_gf128mul_x_ble() |
|---|
| 2840 | + |
|---|
| 2841 | + add $64, INP |
|---|
| 2842 | + add $64, OUTP |
|---|
| 2843 | + sub $64, LEN |
|---|
| 2844 | + ja .Lxts_dec_loop4 |
|---|
| 2845 | + |
|---|
| 2846 | + movups IV, (IVP) |
|---|
| 2837 | 2847 | |
|---|
| 2838 | 2848 | FRAME_END |
|---|
| 2839 | | - ret |
|---|
| 2840 | | -ENDPROC(aesni_xts_crypt8) |
|---|
| 2849 | + RET |
|---|
| 2850 | +SYM_FUNC_END(aesni_xts_decrypt) |
|---|
| 2841 | 2851 | |
|---|
| 2842 | 2852 | #endif |
|---|