hc
2023-12-08 01573e231f18eb2d99162747186f59511f56b64d
kernel/tools/perf/arch/x86/tests/insn-x86-dat-src.c
....@@ -510,6 +510,82 @@
510510 asm volatile("vrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}");
511511 asm volatile("vrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}");
512512
513
+ /* AVX-512: Op code 0f 38 50 */
514
+
515
+ asm volatile("vpdpbusd %xmm1, %xmm2, %xmm3");
516
+ asm volatile("vpdpbusd %ymm1, %ymm2, %ymm3");
517
+ asm volatile("vpdpbusd %zmm1, %zmm2, %zmm3");
518
+ asm volatile("vpdpbusd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
519
+ asm volatile("vpdpbusd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
520
+
521
+ /* AVX-512: Op code 0f 38 51 */
522
+
523
+ asm volatile("vpdpbusds %xmm1, %xmm2, %xmm3");
524
+ asm volatile("vpdpbusds %ymm1, %ymm2, %ymm3");
525
+ asm volatile("vpdpbusds %zmm1, %zmm2, %zmm3");
526
+ asm volatile("vpdpbusds 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
527
+ asm volatile("vpdpbusds 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
528
+
529
+ /* AVX-512: Op code 0f 38 52 */
530
+
531
+ asm volatile("vdpbf16ps %xmm1, %xmm2, %xmm3");
532
+ asm volatile("vdpbf16ps %ymm1, %ymm2, %ymm3");
533
+ asm volatile("vdpbf16ps %zmm1, %zmm2, %zmm3");
534
+ asm volatile("vdpbf16ps 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
535
+ asm volatile("vdpbf16ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
536
+
537
+ asm volatile("vpdpwssd %xmm1, %xmm2, %xmm3");
538
+ asm volatile("vpdpwssd %ymm1, %ymm2, %ymm3");
539
+ asm volatile("vpdpwssd %zmm1, %zmm2, %zmm3");
540
+ asm volatile("vpdpwssd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
541
+ asm volatile("vpdpwssd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
542
+
543
+ asm volatile("vp4dpwssd (%rax), %zmm0, %zmm4");
544
+ asm volatile("vp4dpwssd (%eax), %zmm0, %zmm4");
545
+ asm volatile("vp4dpwssd 0x12345678(%rax,%rcx,8),%zmm0,%zmm4");
546
+ asm volatile("vp4dpwssd 0x12345678(%eax,%ecx,8),%zmm0,%zmm4");
547
+
548
+ /* AVX-512: Op code 0f 38 53 */
549
+
550
+ asm volatile("vpdpwssds %xmm1, %xmm2, %xmm3");
551
+ asm volatile("vpdpwssds %ymm1, %ymm2, %ymm3");
552
+ asm volatile("vpdpwssds %zmm1, %zmm2, %zmm3");
553
+ asm volatile("vpdpwssds 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
554
+ asm volatile("vpdpwssds 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
555
+
556
+ asm volatile("vp4dpwssds (%rax), %zmm0, %zmm4");
557
+ asm volatile("vp4dpwssds (%eax), %zmm0, %zmm4");
558
+ asm volatile("vp4dpwssds 0x12345678(%rax,%rcx,8),%zmm0,%zmm4");
559
+ asm volatile("vp4dpwssds 0x12345678(%eax,%ecx,8),%zmm0,%zmm4");
560
+
561
+ /* AVX-512: Op code 0f 38 54 */
562
+
563
+ asm volatile("vpopcntb %xmm1, %xmm2");
564
+ asm volatile("vpopcntb %ymm1, %ymm2");
565
+ asm volatile("vpopcntb %zmm1, %zmm2");
566
+ asm volatile("vpopcntb 0x12345678(%rax,%rcx,8),%zmm2");
567
+ asm volatile("vpopcntb 0x12345678(%eax,%ecx,8),%zmm2");
568
+
569
+ asm volatile("vpopcntw %xmm1, %xmm2");
570
+ asm volatile("vpopcntw %ymm1, %ymm2");
571
+ asm volatile("vpopcntw %zmm1, %zmm2");
572
+ asm volatile("vpopcntw 0x12345678(%rax,%rcx,8),%zmm2");
573
+ asm volatile("vpopcntw 0x12345678(%eax,%ecx,8),%zmm2");
574
+
575
+ /* AVX-512: Op code 0f 38 55 */
576
+
577
+ asm volatile("vpopcntd %xmm1, %xmm2");
578
+ asm volatile("vpopcntd %ymm1, %ymm2");
579
+ asm volatile("vpopcntd %zmm1, %zmm2");
580
+ asm volatile("vpopcntd 0x12345678(%rax,%rcx,8),%zmm2");
581
+ asm volatile("vpopcntd 0x12345678(%eax,%ecx,8),%zmm2");
582
+
583
+ asm volatile("vpopcntq %xmm1, %xmm2");
584
+ asm volatile("vpopcntq %ymm1, %ymm2");
585
+ asm volatile("vpopcntq %zmm1, %zmm2");
586
+ asm volatile("vpopcntq 0x12345678(%rax,%rcx,8),%zmm2");
587
+ asm volatile("vpopcntq 0x12345678(%eax,%ecx,8),%zmm2");
588
+
513589 /* AVX-512: Op code 0f 38 59 */
514590
515591 asm volatile("vpbroadcastq %xmm4,%xmm6");
....@@ -526,6 +602,34 @@
526602 asm volatile("vbroadcasti32x8 (%rcx),%zmm28");
527603 asm volatile("vbroadcasti64x4 (%rcx),%zmm26");
528604
605
+ /* AVX-512: Op code 0f 38 62 */
606
+
607
+ asm volatile("vpexpandb %xmm1, %xmm2");
608
+ asm volatile("vpexpandb %ymm1, %ymm2");
609
+ asm volatile("vpexpandb %zmm1, %zmm2");
610
+ asm volatile("vpexpandb 0x12345678(%rax,%rcx,8),%zmm2");
611
+ asm volatile("vpexpandb 0x12345678(%eax,%ecx,8),%zmm2");
612
+
613
+ asm volatile("vpexpandw %xmm1, %xmm2");
614
+ asm volatile("vpexpandw %ymm1, %ymm2");
615
+ asm volatile("vpexpandw %zmm1, %zmm2");
616
+ asm volatile("vpexpandw 0x12345678(%rax,%rcx,8),%zmm2");
617
+ asm volatile("vpexpandw 0x12345678(%eax,%ecx,8),%zmm2");
618
+
619
+ /* AVX-512: Op code 0f 38 63 */
620
+
621
+ asm volatile("vpcompressb %xmm1, %xmm2");
622
+ asm volatile("vpcompressb %ymm1, %ymm2");
623
+ asm volatile("vpcompressb %zmm1, %zmm2");
624
+ asm volatile("vpcompressb %zmm2,0x12345678(%rax,%rcx,8)");
625
+ asm volatile("vpcompressb %zmm2,0x12345678(%eax,%ecx,8)");
626
+
627
+ asm volatile("vpcompressw %xmm1, %xmm2");
628
+ asm volatile("vpcompressw %ymm1, %ymm2");
629
+ asm volatile("vpcompressw %zmm1, %zmm2");
630
+ asm volatile("vpcompressw %zmm2,0x12345678(%rax,%rcx,8)");
631
+ asm volatile("vpcompressw %zmm2,0x12345678(%eax,%ecx,8)");
632
+
529633 /* AVX-512: Op code 0f 38 64 */
530634
531635 asm volatile("vpblendmd %zmm26,%zmm27,%zmm28");
....@@ -540,6 +644,76 @@
540644
541645 asm volatile("vpblendmb %zmm26,%zmm27,%zmm28");
542646 asm volatile("vpblendmw %zmm26,%zmm27,%zmm28");
647
+
648
+ /* AVX-512: Op code 0f 38 68 */
649
+
650
+ asm volatile("vp2intersectd %xmm1, %xmm2, %k3");
651
+ asm volatile("vp2intersectd %ymm1, %ymm2, %k3");
652
+ asm volatile("vp2intersectd %zmm1, %zmm2, %k3");
653
+ asm volatile("vp2intersectd 0x12345678(%rax,%rcx,8),%zmm2,%k3");
654
+ asm volatile("vp2intersectd 0x12345678(%eax,%ecx,8),%zmm2,%k3");
655
+
656
+ asm volatile("vp2intersectq %xmm1, %xmm2, %k3");
657
+ asm volatile("vp2intersectq %ymm1, %ymm2, %k3");
658
+ asm volatile("vp2intersectq %zmm1, %zmm2, %k3");
659
+ asm volatile("vp2intersectq 0x12345678(%rax,%rcx,8),%zmm2,%k3");
660
+ asm volatile("vp2intersectq 0x12345678(%eax,%ecx,8),%zmm2,%k3");
661
+
662
+ /* AVX-512: Op code 0f 38 70 */
663
+
664
+ asm volatile("vpshldvw %xmm1, %xmm2, %xmm3");
665
+ asm volatile("vpshldvw %ymm1, %ymm2, %ymm3");
666
+ asm volatile("vpshldvw %zmm1, %zmm2, %zmm3");
667
+ asm volatile("vpshldvw 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
668
+ asm volatile("vpshldvw 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
669
+
670
+ /* AVX-512: Op code 0f 38 71 */
671
+
672
+ asm volatile("vpshldvd %xmm1, %xmm2, %xmm3");
673
+ asm volatile("vpshldvd %ymm1, %ymm2, %ymm3");
674
+ asm volatile("vpshldvd %zmm1, %zmm2, %zmm3");
675
+ asm volatile("vpshldvd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
676
+ asm volatile("vpshldvd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
677
+
678
+ asm volatile("vpshldvq %xmm1, %xmm2, %xmm3");
679
+ asm volatile("vpshldvq %ymm1, %ymm2, %ymm3");
680
+ asm volatile("vpshldvq %zmm1, %zmm2, %zmm3");
681
+ asm volatile("vpshldvq 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
682
+ asm volatile("vpshldvq 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
683
+
684
+ /* AVX-512: Op code 0f 38 72 */
685
+
686
+ asm volatile("vcvtne2ps2bf16 %xmm1, %xmm2, %xmm3");
687
+ asm volatile("vcvtne2ps2bf16 %ymm1, %ymm2, %ymm3");
688
+ asm volatile("vcvtne2ps2bf16 %zmm1, %zmm2, %zmm3");
689
+ asm volatile("vcvtne2ps2bf16 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
690
+ asm volatile("vcvtne2ps2bf16 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
691
+
692
+ asm volatile("vcvtneps2bf16 %xmm1, %xmm2");
693
+ asm volatile("vcvtneps2bf16 %ymm1, %xmm2");
694
+ asm volatile("vcvtneps2bf16 %zmm1, %ymm2");
695
+ asm volatile("vcvtneps2bf16 0x12345678(%rax,%rcx,8),%ymm2");
696
+ asm volatile("vcvtneps2bf16 0x12345678(%eax,%ecx,8),%ymm2");
697
+
698
+ asm volatile("vpshrdvw %xmm1, %xmm2, %xmm3");
699
+ asm volatile("vpshrdvw %ymm1, %ymm2, %ymm3");
700
+ asm volatile("vpshrdvw %zmm1, %zmm2, %zmm3");
701
+ asm volatile("vpshrdvw 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
702
+ asm volatile("vpshrdvw 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
703
+
704
+ /* AVX-512: Op code 0f 38 73 */
705
+
706
+ asm volatile("vpshrdvd %xmm1, %xmm2, %xmm3");
707
+ asm volatile("vpshrdvd %ymm1, %ymm2, %ymm3");
708
+ asm volatile("vpshrdvd %zmm1, %zmm2, %zmm3");
709
+ asm volatile("vpshrdvd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
710
+ asm volatile("vpshrdvd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
711
+
712
+ asm volatile("vpshrdvq %xmm1, %xmm2, %xmm3");
713
+ asm volatile("vpshrdvq %ymm1, %ymm2, %ymm3");
714
+ asm volatile("vpshrdvq %zmm1, %zmm2, %zmm3");
715
+ asm volatile("vpshrdvq 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
716
+ asm volatile("vpshrdvq 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
543717
544718 /* AVX-512: Op code 0f 38 75 */
545719
....@@ -613,6 +787,14 @@
613787 asm volatile("vpermb %zmm26,%zmm27,%zmm28");
614788 asm volatile("vpermw %zmm26,%zmm27,%zmm28");
615789
790
+ /* AVX-512: Op code 0f 38 8f */
791
+
792
+ asm volatile("vpshufbitqmb %xmm1, %xmm2, %k3");
793
+ asm volatile("vpshufbitqmb %ymm1, %ymm2, %k3");
794
+ asm volatile("vpshufbitqmb %zmm1, %zmm2, %k3");
795
+ asm volatile("vpshufbitqmb 0x12345678(%rax,%rcx,8),%zmm2,%k3");
796
+ asm volatile("vpshufbitqmb 0x12345678(%eax,%ecx,8),%zmm2,%k3");
797
+
616798 /* AVX-512: Op code 0f 38 90 */
617799
618800 asm volatile("vpgatherdd %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
....@@ -626,6 +808,40 @@
626808 asm volatile("vpgatherqq %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
627809 asm volatile("vpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}");
628810 asm volatile("vpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}");
811
+
812
+ /* AVX-512: Op code 0f 38 9a */
813
+
814
+ asm volatile("vfmsub132ps %xmm1, %xmm2, %xmm3");
815
+ asm volatile("vfmsub132ps %ymm1, %ymm2, %ymm3");
816
+ asm volatile("vfmsub132ps %zmm1, %zmm2, %zmm3");
817
+ asm volatile("vfmsub132ps 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
818
+ asm volatile("vfmsub132ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
819
+
820
+ asm volatile("vfmsub132pd %xmm1, %xmm2, %xmm3");
821
+ asm volatile("vfmsub132pd %ymm1, %ymm2, %ymm3");
822
+ asm volatile("vfmsub132pd %zmm1, %zmm2, %zmm3");
823
+ asm volatile("vfmsub132pd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
824
+ asm volatile("vfmsub132pd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
825
+
826
+ asm volatile("v4fmaddps (%rax), %zmm0, %zmm4");
827
+ asm volatile("v4fmaddps (%eax), %zmm0, %zmm4");
828
+ asm volatile("v4fmaddps 0x12345678(%rax,%rcx,8),%zmm0,%zmm4");
829
+ asm volatile("v4fmaddps 0x12345678(%eax,%ecx,8),%zmm0,%zmm4");
830
+
831
+ /* AVX-512: Op code 0f 38 9b */
832
+
833
+ asm volatile("vfmsub132ss %xmm1, %xmm2, %xmm3");
834
+ asm volatile("vfmsub132ss 0x12345678(%rax,%rcx,8),%xmm2,%xmm3");
835
+ asm volatile("vfmsub132ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm3");
836
+
837
+ asm volatile("vfmsub132sd %xmm1, %xmm2, %xmm3");
838
+ asm volatile("vfmsub132sd 0x12345678(%rax,%rcx,8),%xmm2,%xmm3");
839
+ asm volatile("vfmsub132sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm3");
840
+
841
+ asm volatile("v4fmaddss (%rax), %xmm0, %xmm4");
842
+ asm volatile("v4fmaddss (%eax), %xmm0, %xmm4");
843
+ asm volatile("v4fmaddss 0x12345678(%rax,%rcx,8),%xmm0,%xmm4");
844
+ asm volatile("v4fmaddss 0x12345678(%eax,%ecx,8),%xmm0,%xmm4");
629845
630846 /* AVX-512: Op code 0f 38 a0 */
631847
....@@ -646,6 +862,40 @@
646862
647863 asm volatile("vscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}");
648864 asm volatile("vscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
865
+
866
+ /* AVX-512: Op code 0f 38 aa */
867
+
868
+ asm volatile("vfmsub213ps %xmm1, %xmm2, %xmm3");
869
+ asm volatile("vfmsub213ps %ymm1, %ymm2, %ymm3");
870
+ asm volatile("vfmsub213ps %zmm1, %zmm2, %zmm3");
871
+ asm volatile("vfmsub213ps 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
872
+ asm volatile("vfmsub213ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
873
+
874
+ asm volatile("vfmsub213pd %xmm1, %xmm2, %xmm3");
875
+ asm volatile("vfmsub213pd %ymm1, %ymm2, %ymm3");
876
+ asm volatile("vfmsub213pd %zmm1, %zmm2, %zmm3");
877
+ asm volatile("vfmsub213pd 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
878
+ asm volatile("vfmsub213pd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
879
+
880
+ asm volatile("v4fnmaddps (%rax), %zmm0, %zmm4");
881
+ asm volatile("v4fnmaddps (%eax), %zmm0, %zmm4");
882
+ asm volatile("v4fnmaddps 0x12345678(%rax,%rcx,8),%zmm0,%zmm4");
883
+ asm volatile("v4fnmaddps 0x12345678(%eax,%ecx,8),%zmm0,%zmm4");
884
+
885
+ /* AVX-512: Op code 0f 38 ab */
886
+
887
+ asm volatile("vfmsub213ss %xmm1, %xmm2, %xmm3");
888
+ asm volatile("vfmsub213ss 0x12345678(%rax,%rcx,8),%xmm2,%xmm3");
889
+ asm volatile("vfmsub213ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm3");
890
+
891
+ asm volatile("vfmsub213sd %xmm1, %xmm2, %xmm3");
892
+ asm volatile("vfmsub213sd 0x12345678(%rax,%rcx,8),%xmm2,%xmm3");
893
+ asm volatile("vfmsub213sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm3");
894
+
895
+ asm volatile("v4fnmaddss (%rax), %xmm0, %xmm4");
896
+ asm volatile("v4fnmaddss (%eax), %xmm0, %xmm4");
897
+ asm volatile("v4fnmaddss 0x12345678(%rax,%rcx,8),%xmm0,%xmm4");
898
+ asm volatile("v4fnmaddss 0x12345678(%eax,%ecx,8),%xmm0,%xmm4");
649899
650900 /* AVX-512: Op code 0f 38 b4 */
651901
....@@ -684,6 +934,50 @@
684934
685935 asm volatile("vrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}");
686936 asm volatile("vrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}");
937
+
938
+ /* AVX-512: Op code 0f 38 cf */
939
+
940
+ asm volatile("gf2p8mulb %xmm1, %xmm3");
941
+ asm volatile("gf2p8mulb 0x12345678(%rax,%rcx,8),%xmm3");
942
+ asm volatile("gf2p8mulb 0x12345678(%eax,%ecx,8),%xmm3");
943
+
944
+ asm volatile("vgf2p8mulb %xmm1, %xmm2, %xmm3");
945
+ asm volatile("vgf2p8mulb %ymm1, %ymm2, %ymm3");
946
+ asm volatile("vgf2p8mulb %zmm1, %zmm2, %zmm3");
947
+ asm volatile("vgf2p8mulb 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
948
+ asm volatile("vgf2p8mulb 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
949
+
950
+ /* AVX-512: Op code 0f 38 dc */
951
+
952
+ asm volatile("vaesenc %xmm1, %xmm2, %xmm3");
953
+ asm volatile("vaesenc %ymm1, %ymm2, %ymm3");
954
+ asm volatile("vaesenc %zmm1, %zmm2, %zmm3");
955
+ asm volatile("vaesenc 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
956
+ asm volatile("vaesenc 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
957
+
958
+ /* AVX-512: Op code 0f 38 dd */
959
+
960
+ asm volatile("vaesenclast %xmm1, %xmm2, %xmm3");
961
+ asm volatile("vaesenclast %ymm1, %ymm2, %ymm3");
962
+ asm volatile("vaesenclast %zmm1, %zmm2, %zmm3");
963
+ asm volatile("vaesenclast 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
964
+ asm volatile("vaesenclast 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
965
+
966
+ /* AVX-512: Op code 0f 38 de */
967
+
968
+ asm volatile("vaesdec %xmm1, %xmm2, %xmm3");
969
+ asm volatile("vaesdec %ymm1, %ymm2, %ymm3");
970
+ asm volatile("vaesdec %zmm1, %zmm2, %zmm3");
971
+ asm volatile("vaesdec 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
972
+ asm volatile("vaesdec 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
973
+
974
+ /* AVX-512: Op code 0f 38 df */
975
+
976
+ asm volatile("vaesdeclast %xmm1, %xmm2, %xmm3");
977
+ asm volatile("vaesdeclast %ymm1, %ymm2, %ymm3");
978
+ asm volatile("vaesdeclast %zmm1, %zmm2, %zmm3");
979
+ asm volatile("vaesdeclast 0x12345678(%rax,%rcx,8),%zmm2,%zmm3");
980
+ asm volatile("vaesdeclast 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
687981
688982 /* AVX-512: Op code 0f 3a 03 */
689983
....@@ -804,6 +1098,13 @@
8041098 asm volatile("vshufi32x4 $0x12,%zmm25,%zmm26,%zmm27");
8051099 asm volatile("vshufi64x2 $0x12,%zmm28,%zmm29,%zmm30");
8061100
1101
+ /* AVX-512: Op code 0f 3a 44 */
1102
+
1103
+ asm volatile("vpclmulqdq $0x12,%xmm1,%xmm2,%xmm3");
1104
+ asm volatile("vpclmulqdq $0x12,%ymm1,%ymm2,%ymm3");
1105
+ asm volatile("vpclmulqdq $0x12,%zmm1,%zmm2,%zmm3");
1106
+ asm volatile("vpclmulqdq $0x12,%zmm25,%zmm26,%zmm27");
1107
+
8071108 /* AVX-512: Op code 0f 3a 50 */
8081109
8091110 asm volatile("vrangeps $0x12,%zmm25,%zmm26,%zmm27");
....@@ -843,6 +1144,62 @@
8431144
8441145 asm volatile("vfpclassss $0x12,%xmm27,%k5");
8451146 asm volatile("vfpclasssd $0x12,%xmm30,%k5");
1147
+
1148
+ /* AVX-512: Op code 0f 3a 70 */
1149
+
1150
+ asm volatile("vpshldw $0x12,%xmm1,%xmm2,%xmm3");
1151
+ asm volatile("vpshldw $0x12,%ymm1,%ymm2,%ymm3");
1152
+ asm volatile("vpshldw $0x12,%zmm1,%zmm2,%zmm3");
1153
+ asm volatile("vpshldw $0x12,%zmm25,%zmm26,%zmm27");
1154
+
1155
+ /* AVX-512: Op code 0f 3a 71 */
1156
+
1157
+ asm volatile("vpshldd $0x12,%xmm1,%xmm2,%xmm3");
1158
+ asm volatile("vpshldd $0x12,%ymm1,%ymm2,%ymm3");
1159
+ asm volatile("vpshldd $0x12,%zmm1,%zmm2,%zmm3");
1160
+ asm volatile("vpshldd $0x12,%zmm25,%zmm26,%zmm27");
1161
+
1162
+ asm volatile("vpshldq $0x12,%xmm1,%xmm2,%xmm3");
1163
+ asm volatile("vpshldq $0x12,%ymm1,%ymm2,%ymm3");
1164
+ asm volatile("vpshldq $0x12,%zmm1,%zmm2,%zmm3");
1165
+ asm volatile("vpshldq $0x12,%zmm25,%zmm26,%zmm27");
1166
+
1167
+ /* AVX-512: Op code 0f 3a 72 */
1168
+
1169
+ asm volatile("vpshrdw $0x12,%xmm1,%xmm2,%xmm3");
1170
+ asm volatile("vpshrdw $0x12,%ymm1,%ymm2,%ymm3");
1171
+ asm volatile("vpshrdw $0x12,%zmm1,%zmm2,%zmm3");
1172
+ asm volatile("vpshrdw $0x12,%zmm25,%zmm26,%zmm27");
1173
+
1174
+ /* AVX-512: Op code 0f 3a 73 */
1175
+
1176
+ asm volatile("vpshrdd $0x12,%xmm1,%xmm2,%xmm3");
1177
+ asm volatile("vpshrdd $0x12,%ymm1,%ymm2,%ymm3");
1178
+ asm volatile("vpshrdd $0x12,%zmm1,%zmm2,%zmm3");
1179
+ asm volatile("vpshrdd $0x12,%zmm25,%zmm26,%zmm27");
1180
+
1181
+ asm volatile("vpshrdq $0x12,%xmm1,%xmm2,%xmm3");
1182
+ asm volatile("vpshrdq $0x12,%ymm1,%ymm2,%ymm3");
1183
+ asm volatile("vpshrdq $0x12,%zmm1,%zmm2,%zmm3");
1184
+ asm volatile("vpshrdq $0x12,%zmm25,%zmm26,%zmm27");
1185
+
1186
+ /* AVX-512: Op code 0f 3a ce */
1187
+
1188
+ asm volatile("gf2p8affineqb $0x12,%xmm1,%xmm3");
1189
+
1190
+ asm volatile("vgf2p8affineqb $0x12,%xmm1,%xmm2,%xmm3");
1191
+ asm volatile("vgf2p8affineqb $0x12,%ymm1,%ymm2,%ymm3");
1192
+ asm volatile("vgf2p8affineqb $0x12,%zmm1,%zmm2,%zmm3");
1193
+ asm volatile("vgf2p8affineqb $0x12,%zmm25,%zmm26,%zmm27");
1194
+
1195
+ /* AVX-512: Op code 0f 3a cf */
1196
+
1197
+ asm volatile("gf2p8affineinvqb $0x12,%xmm1,%xmm3");
1198
+
1199
+ asm volatile("vgf2p8affineinvqb $0x12,%xmm1,%xmm2,%xmm3");
1200
+ asm volatile("vgf2p8affineinvqb $0x12,%ymm1,%ymm2,%ymm3");
1201
+ asm volatile("vgf2p8affineinvqb $0x12,%zmm1,%zmm2,%zmm3");
1202
+ asm volatile("vgf2p8affineinvqb $0x12,%zmm25,%zmm26,%zmm27");
8461203
8471204 /* AVX-512: Op code 0f 72 (Grp13) */
8481205
....@@ -1320,6 +1677,14 @@
13201677 asm volatile("xsaveopt (%r8)");
13211678 asm volatile("mfence");
13221679
1680
+ /* cldemote m8 */
1681
+
1682
+ asm volatile("cldemote (%rax)");
1683
+ asm volatile("cldemote (%r8)");
1684
+ asm volatile("cldemote (0x12345678)");
1685
+ asm volatile("cldemote 0x12345678(%rax,%rcx,8)");
1686
+ asm volatile("cldemote 0x12345678(%r8,%rcx,8)");
1687
+
13231688 /* xsavec mem */
13241689
13251690 asm volatile("xsavec (%rax)");
....@@ -1363,6 +1728,187 @@
13631728 asm volatile("ptwriteq (0x12345678)");
13641729 asm volatile("ptwriteq 0x12345678(%rax,%rcx,8)");
13651730 asm volatile("ptwriteq 0x12345678(%r8,%rcx,8)");
1731
+
1732
+ /* tpause */
1733
+
1734
+ asm volatile("tpause %ebx");
1735
+ asm volatile("tpause %r8d");
1736
+
1737
+ /* umonitor */
1738
+
1739
+ asm volatile("umonitor %eax");
1740
+ asm volatile("umonitor %rax");
1741
+ asm volatile("umonitor %r8d");
1742
+
1743
+ /* umwait */
1744
+
1745
+ asm volatile("umwait %eax");
1746
+ asm volatile("umwait %r8d");
1747
+
1748
+ /* movdiri */
1749
+
1750
+ asm volatile("movdiri %rax,(%rbx)");
1751
+ asm volatile("movdiri %rcx,0x12345678(%rax)");
1752
+
1753
+ /* movdir64b */
1754
+
1755
+ asm volatile("movdir64b (%rax),%rbx");
1756
+ asm volatile("movdir64b 0x12345678(%rax),%rcx");
1757
+ asm volatile("movdir64b (%eax),%ebx");
1758
+ asm volatile("movdir64b 0x12345678(%eax),%ecx");
1759
+
1760
+ /* enqcmd */
1761
+
1762
+ asm volatile("enqcmd (%rax),%rbx");
1763
+ asm volatile("enqcmd 0x12345678(%rax),%rcx");
1764
+ asm volatile("enqcmd (%eax),%ebx");
1765
+ asm volatile("enqcmd 0x12345678(%eax),%ecx");
1766
+
1767
+ /* enqcmds */
1768
+
1769
+ asm volatile("enqcmds (%rax),%rbx");
1770
+ asm volatile("enqcmds 0x12345678(%rax),%rcx");
1771
+ asm volatile("enqcmds (%eax),%ebx");
1772
+ asm volatile("enqcmds 0x12345678(%eax),%ecx");
1773
+
1774
+ /* incsspd/q */
1775
+
1776
+ asm volatile("incsspd %eax");
1777
+ asm volatile("incsspd %r8d");
1778
+ asm volatile("incsspq %rax");
1779
+ asm volatile("incsspq %r8");
1780
+ /* Also check instructions in the same group encoding as incsspd/q */
1781
+ asm volatile("xrstor (%rax)");
1782
+ asm volatile("xrstor (%r8)");
1783
+ asm volatile("xrstor (0x12345678)");
1784
+ asm volatile("xrstor 0x12345678(%rax,%rcx,8)");
1785
+ asm volatile("xrstor 0x12345678(%r8,%rcx,8)");
1786
+ asm volatile("lfence");
1787
+
1788
+ /* rdsspd/q */
1789
+
1790
+ asm volatile("rdsspd %eax");
1791
+ asm volatile("rdsspd %r8d");
1792
+ asm volatile("rdsspq %rax");
1793
+ asm volatile("rdsspq %r8");
1794
+
1795
+ /* saveprevssp */
1796
+
1797
+ asm volatile("saveprevssp");
1798
+
1799
+ /* rstorssp */
1800
+
1801
+ asm volatile("rstorssp (%rax)");
1802
+ asm volatile("rstorssp (%r8)");
1803
+ asm volatile("rstorssp (0x12345678)");
1804
+ asm volatile("rstorssp 0x12345678(%rax,%rcx,8)");
1805
+ asm volatile("rstorssp 0x12345678(%r8,%rcx,8)");
1806
+
1807
+ /* wrssd/q */
1808
+
1809
+ asm volatile("wrssd %ecx,(%rax)");
1810
+ asm volatile("wrssd %edx,(%r8)");
1811
+ asm volatile("wrssd %edx,(0x12345678)");
1812
+ asm volatile("wrssd %edx,0x12345678(%rax,%rcx,8)");
1813
+ asm volatile("wrssd %edx,0x12345678(%r8,%rcx,8)");
1814
+ asm volatile("wrssq %rcx,(%rax)");
1815
+ asm volatile("wrssq %rdx,(%r8)");
1816
+ asm volatile("wrssq %rdx,(0x12345678)");
1817
+ asm volatile("wrssq %rdx,0x12345678(%rax,%rcx,8)");
1818
+ asm volatile("wrssq %rdx,0x12345678(%r8,%rcx,8)");
1819
+
1820
+ /* wrussd/q */
1821
+
1822
+ asm volatile("wrussd %ecx,(%rax)");
1823
+ asm volatile("wrussd %edx,(%r8)");
1824
+ asm volatile("wrussd %edx,(0x12345678)");
1825
+ asm volatile("wrussd %edx,0x12345678(%rax,%rcx,8)");
1826
+ asm volatile("wrussd %edx,0x12345678(%r8,%rcx,8)");
1827
+ asm volatile("wrussq %rcx,(%rax)");
1828
+ asm volatile("wrussq %rdx,(%r8)");
1829
+ asm volatile("wrussq %rdx,(0x12345678)");
1830
+ asm volatile("wrussq %rdx,0x12345678(%rax,%rcx,8)");
1831
+ asm volatile("wrussq %rdx,0x12345678(%r8,%rcx,8)");
1832
+
1833
+ /* setssbsy */
1834
+
1835
+ asm volatile("setssbsy");
1836
+ /* Also check instructions in the same group encoding as setssbsy */
1837
+ asm volatile("rdpkru");
1838
+ asm volatile("wrpkru");
1839
+
1840
+ /* clrssbsy */
1841
+
1842
+ asm volatile("clrssbsy (%rax)");
1843
+ asm volatile("clrssbsy (%r8)");
1844
+ asm volatile("clrssbsy (0x12345678)");
1845
+ asm volatile("clrssbsy 0x12345678(%rax,%rcx,8)");
1846
+ asm volatile("clrssbsy 0x12345678(%r8,%rcx,8)");
1847
+
1848
+ /* endbr32/64 */
1849
+
1850
+ asm volatile("endbr32");
1851
+ asm volatile("endbr64");
1852
+
1853
+ /* call with/without notrack prefix */
1854
+
1855
+ asm volatile("callq *%rax"); /* Expecting: call indirect 0 */
1856
+ asm volatile("callq *(%rax)"); /* Expecting: call indirect 0 */
1857
+ asm volatile("callq *(%r8)"); /* Expecting: call indirect 0 */
1858
+ asm volatile("callq *(0x12345678)"); /* Expecting: call indirect 0 */
1859
+ asm volatile("callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */
1860
+ asm volatile("callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */
1861
+
1862
+ asm volatile("bnd callq *%rax"); /* Expecting: call indirect 0 */
1863
+ asm volatile("bnd callq *(%rax)"); /* Expecting: call indirect 0 */
1864
+ asm volatile("bnd callq *(%r8)"); /* Expecting: call indirect 0 */
1865
+ asm volatile("bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */
1866
+ asm volatile("bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */
1867
+ asm volatile("bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */
1868
+
1869
+ asm volatile("notrack callq *%rax"); /* Expecting: call indirect 0 */
1870
+ asm volatile("notrack callq *(%rax)"); /* Expecting: call indirect 0 */
1871
+ asm volatile("notrack callq *(%r8)"); /* Expecting: call indirect 0 */
1872
+ asm volatile("notrack callq *(0x12345678)"); /* Expecting: call indirect 0 */
1873
+ asm volatile("notrack callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */
1874
+ asm volatile("notrack callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */
1875
+
1876
+ asm volatile("notrack bnd callq *%rax"); /* Expecting: call indirect 0 */
1877
+ asm volatile("notrack bnd callq *(%rax)"); /* Expecting: call indirect 0 */
1878
+ asm volatile("notrack bnd callq *(%r8)"); /* Expecting: call indirect 0 */
1879
+ asm volatile("notrack bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */
1880
+ asm volatile("notrack bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */
1881
+ asm volatile("notrack bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */
1882
+
1883
+ /* jmp with/without notrack prefix */
1884
+
1885
+ asm volatile("jmpq *%rax"); /* Expecting: jmp indirect 0 */
1886
+ asm volatile("jmpq *(%rax)"); /* Expecting: jmp indirect 0 */
1887
+ asm volatile("jmpq *(%r8)"); /* Expecting: jmp indirect 0 */
1888
+ asm volatile("jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */
1889
+ asm volatile("jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
1890
+ asm volatile("jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
1891
+
1892
+ asm volatile("bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */
1893
+ asm volatile("bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */
1894
+ asm volatile("bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */
1895
+ asm volatile("bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */
1896
+ asm volatile("bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
1897
+ asm volatile("bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
1898
+
1899
+ asm volatile("notrack jmpq *%rax"); /* Expecting: jmp indirect 0 */
1900
+ asm volatile("notrack jmpq *(%rax)"); /* Expecting: jmp indirect 0 */
1901
+ asm volatile("notrack jmpq *(%r8)"); /* Expecting: jmp indirect 0 */
1902
+ asm volatile("notrack jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */
1903
+ asm volatile("notrack jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
1904
+ asm volatile("notrack jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
1905
+
1906
+ asm volatile("notrack bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */
1907
+ asm volatile("notrack bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */
1908
+ asm volatile("notrack bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */
1909
+ asm volatile("notrack bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */
1910
+ asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
1911
+ asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
13661912
13671913 #else /* #ifdef __x86_64__ */
13681914
....@@ -1896,6 +2442,69 @@
18962442 asm volatile("vrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}");
18972443 asm volatile("vrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}");
18982444
2445
+ /* AVX-512: Op code 0f 38 50 */
2446
+
2447
+ asm volatile("vpdpbusd %xmm1, %xmm2, %xmm3");
2448
+ asm volatile("vpdpbusd %ymm1, %ymm2, %ymm3");
2449
+ asm volatile("vpdpbusd %zmm1, %zmm2, %zmm3");
2450
+ asm volatile("vpdpbusd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2451
+
2452
+ /* AVX-512: Op code 0f 38 51 */
2453
+
2454
+ asm volatile("vpdpbusds %xmm1, %xmm2, %xmm3");
2455
+ asm volatile("vpdpbusds %ymm1, %ymm2, %ymm3");
2456
+ asm volatile("vpdpbusds %zmm1, %zmm2, %zmm3");
2457
+ asm volatile("vpdpbusds 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2458
+
2459
+ /* AVX-512: Op code 0f 38 52 */
2460
+
2461
+ asm volatile("vdpbf16ps %xmm1, %xmm2, %xmm3");
2462
+ asm volatile("vdpbf16ps %ymm1, %ymm2, %ymm3");
2463
+ asm volatile("vdpbf16ps %zmm1, %zmm2, %zmm3");
2464
+ asm volatile("vdpbf16ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2465
+
2466
+ asm volatile("vpdpwssd %xmm1, %xmm2, %xmm3");
2467
+ asm volatile("vpdpwssd %ymm1, %ymm2, %ymm3");
2468
+ asm volatile("vpdpwssd %zmm1, %zmm2, %zmm3");
2469
+ asm volatile("vpdpwssd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2470
+
2471
+ asm volatile("vp4dpwssd (%eax), %zmm0, %zmm4");
2472
+ asm volatile("vp4dpwssd 0x12345678(%eax,%ecx,8),%zmm0,%zmm4");
2473
+
2474
+ /* AVX-512: Op code 0f 38 53 */
2475
+
2476
+ asm volatile("vpdpwssds %xmm1, %xmm2, %xmm3");
2477
+ asm volatile("vpdpwssds %ymm1, %ymm2, %ymm3");
2478
+ asm volatile("vpdpwssds %zmm1, %zmm2, %zmm3");
2479
+ asm volatile("vpdpwssds 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2480
+
2481
+ asm volatile("vp4dpwssds (%eax), %zmm0, %zmm4");
2482
+ asm volatile("vp4dpwssds 0x12345678(%eax,%ecx,8),%zmm0,%zmm4");
2483
+
2484
+ /* AVX-512: Op code 0f 38 54 */
2485
+
2486
+ asm volatile("vpopcntb %xmm1, %xmm2");
2487
+ asm volatile("vpopcntb %ymm1, %ymm2");
2488
+ asm volatile("vpopcntb %zmm1, %zmm2");
2489
+ asm volatile("vpopcntb 0x12345678(%eax,%ecx,8),%zmm2");
2490
+
2491
+ asm volatile("vpopcntw %xmm1, %xmm2");
2492
+ asm volatile("vpopcntw %ymm1, %ymm2");
2493
+ asm volatile("vpopcntw %zmm1, %zmm2");
2494
+ asm volatile("vpopcntw 0x12345678(%eax,%ecx,8),%zmm2");
2495
+
2496
+ /* AVX-512: Op code 0f 38 55 */
2497
+
2498
+ asm volatile("vpopcntd %xmm1, %xmm2");
2499
+ asm volatile("vpopcntd %ymm1, %ymm2");
2500
+ asm volatile("vpopcntd %zmm1, %zmm2");
2501
+ asm volatile("vpopcntd 0x12345678(%eax,%ecx,8),%zmm2");
2502
+
2503
+ asm volatile("vpopcntq %xmm1, %xmm2");
2504
+ asm volatile("vpopcntq %ymm1, %ymm2");
2505
+ asm volatile("vpopcntq %zmm1, %zmm2");
2506
+ asm volatile("vpopcntq 0x12345678(%eax,%ecx,8),%zmm2");
2507
+
18992508 /* AVX-512: Op code 0f 38 59 */
19002509
19012510 asm volatile("vpbroadcastq %xmm4,%xmm6");
....@@ -1912,6 +2521,30 @@
19122521 asm volatile("vbroadcasti32x8 (%ecx),%zmm6");
19132522 asm volatile("vbroadcasti64x4 (%ecx),%zmm6");
19142523
2524
+ /* AVX-512: Op code 0f 38 62 */
2525
+
2526
+ asm volatile("vpexpandb %xmm1, %xmm2");
2527
+ asm volatile("vpexpandb %ymm1, %ymm2");
2528
+ asm volatile("vpexpandb %zmm1, %zmm2");
2529
+ asm volatile("vpexpandb 0x12345678(%eax,%ecx,8),%zmm2");
2530
+
2531
+ asm volatile("vpexpandw %xmm1, %xmm2");
2532
+ asm volatile("vpexpandw %ymm1, %ymm2");
2533
+ asm volatile("vpexpandw %zmm1, %zmm2");
2534
+ asm volatile("vpexpandw 0x12345678(%eax,%ecx,8),%zmm2");
2535
+
2536
+ /* AVX-512: Op code 0f 38 63 */
2537
+
2538
+ asm volatile("vpcompressb %xmm1, %xmm2");
2539
+ asm volatile("vpcompressb %ymm1, %ymm2");
2540
+ asm volatile("vpcompressb %zmm1, %zmm2");
2541
+ asm volatile("vpcompressb %zmm2,0x12345678(%eax,%ecx,8)");
2542
+
2543
+ asm volatile("vpcompressw %xmm1, %xmm2");
2544
+ asm volatile("vpcompressw %ymm1, %ymm2");
2545
+ asm volatile("vpcompressw %zmm1, %zmm2");
2546
+ asm volatile("vpcompressw %zmm2,0x12345678(%eax,%ecx,8)");
2547
+
19152548 /* AVX-512: Op code 0f 38 64 */
19162549
19172550 asm volatile("vpblendmd %zmm4,%zmm5,%zmm6");
....@@ -1926,6 +2559,66 @@
19262559
19272560 asm volatile("vpblendmb %zmm4,%zmm5,%zmm6");
19282561 asm volatile("vpblendmw %zmm4,%zmm5,%zmm6");
2562
+
2563
+ /* AVX-512: Op code 0f 38 68 */
2564
+
2565
+ asm volatile("vp2intersectd %xmm1, %xmm2, %k3");
2566
+ asm volatile("vp2intersectd %ymm1, %ymm2, %k3");
2567
+ asm volatile("vp2intersectd %zmm1, %zmm2, %k3");
2568
+ asm volatile("vp2intersectd 0x12345678(%eax,%ecx,8),%zmm2,%k3");
2569
+
2570
+ asm volatile("vp2intersectq %xmm1, %xmm2, %k3");
2571
+ asm volatile("vp2intersectq %ymm1, %ymm2, %k3");
2572
+ asm volatile("vp2intersectq %zmm1, %zmm2, %k3");
2573
+ asm volatile("vp2intersectq 0x12345678(%eax,%ecx,8),%zmm2,%k3");
2574
+
2575
+ /* AVX-512: Op code 0f 38 70 */
2576
+
2577
+ asm volatile("vpshldvw %xmm1, %xmm2, %xmm3");
2578
+ asm volatile("vpshldvw %ymm1, %ymm2, %ymm3");
2579
+ asm volatile("vpshldvw %zmm1, %zmm2, %zmm3");
2580
+ asm volatile("vpshldvw 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2581
+
2582
+ /* AVX-512: Op code 0f 38 71 */
2583
+
2584
+ asm volatile("vpshldvd %xmm1, %xmm2, %xmm3");
2585
+ asm volatile("vpshldvd %ymm1, %ymm2, %ymm3");
2586
+ asm volatile("vpshldvd %zmm1, %zmm2, %zmm3");
2587
+ asm volatile("vpshldvd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2588
+
2589
+ asm volatile("vpshldvq %xmm1, %xmm2, %xmm3");
2590
+ asm volatile("vpshldvq %ymm1, %ymm2, %ymm3");
2591
+ asm volatile("vpshldvq %zmm1, %zmm2, %zmm3");
2592
+ asm volatile("vpshldvq 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2593
+
2594
+ /* AVX-512: Op code 0f 38 72 */
2595
+
2596
+ asm volatile("vcvtne2ps2bf16 %xmm1, %xmm2, %xmm3");
2597
+ asm volatile("vcvtne2ps2bf16 %ymm1, %ymm2, %ymm3");
2598
+ asm volatile("vcvtne2ps2bf16 %zmm1, %zmm2, %zmm3");
2599
+ asm volatile("vcvtne2ps2bf16 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2600
+
2601
+ asm volatile("vcvtneps2bf16 %xmm1, %xmm2");
2602
+ asm volatile("vcvtneps2bf16 %ymm1, %xmm2");
2603
+ asm volatile("vcvtneps2bf16 %zmm1, %ymm2");
2604
+ asm volatile("vcvtneps2bf16 0x12345678(%eax,%ecx,8),%ymm2");
2605
+
2606
+ asm volatile("vpshrdvw %xmm1, %xmm2, %xmm3");
2607
+ asm volatile("vpshrdvw %ymm1, %ymm2, %ymm3");
2608
+ asm volatile("vpshrdvw %zmm1, %zmm2, %zmm3");
2609
+ asm volatile("vpshrdvw 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2610
+
2611
+ /* AVX-512: Op code 0f 38 73 */
2612
+
2613
+ asm volatile("vpshrdvd %xmm1, %xmm2, %xmm3");
2614
+ asm volatile("vpshrdvd %ymm1, %ymm2, %ymm3");
2615
+ asm volatile("vpshrdvd %zmm1, %zmm2, %zmm3");
2616
+ asm volatile("vpshrdvd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2617
+
2618
+ asm volatile("vpshrdvq %xmm1, %xmm2, %xmm3");
2619
+ asm volatile("vpshrdvq %ymm1, %ymm2, %ymm3");
2620
+ asm volatile("vpshrdvq %zmm1, %zmm2, %zmm3");
2621
+ asm volatile("vpshrdvq 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
19292622
19302623 /* AVX-512: Op code 0f 38 75 */
19312624
....@@ -1998,6 +2691,13 @@
19982691 asm volatile("vpermb %zmm4,%zmm5,%zmm6");
19992692 asm volatile("vpermw %zmm4,%zmm5,%zmm6");
20002693
2694
+ /* AVX-512: Op code 0f 38 8f */
2695
+
2696
+ asm volatile("vpshufbitqmb %xmm1, %xmm2, %k3");
2697
+ asm volatile("vpshufbitqmb %ymm1, %ymm2, %k3");
2698
+ asm volatile("vpshufbitqmb %zmm1, %zmm2, %k3");
2699
+ asm volatile("vpshufbitqmb 0x12345678(%eax,%ecx,8),%zmm2,%k3");
2700
+
20012701 /* AVX-512: Op code 0f 38 90 */
20022702
20032703 asm volatile("vpgatherdd %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
....@@ -2011,6 +2711,32 @@
20112711 asm volatile("vpgatherqq %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
20122712 asm volatile("vpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}");
20132713 asm volatile("vpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}");
2714
+
2715
+ /* AVX-512: Op code 0f 38 9a */
2716
+
2717
+ asm volatile("vfmsub132ps %xmm1, %xmm2, %xmm3");
2718
+ asm volatile("vfmsub132ps %ymm1, %ymm2, %ymm3");
2719
+ asm volatile("vfmsub132ps %zmm1, %zmm2, %zmm3");
2720
+ asm volatile("vfmsub132ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2721
+
2722
+ asm volatile("vfmsub132pd %xmm1, %xmm2, %xmm3");
2723
+ asm volatile("vfmsub132pd %ymm1, %ymm2, %ymm3");
2724
+ asm volatile("vfmsub132pd %zmm1, %zmm2, %zmm3");
2725
+ asm volatile("vfmsub132pd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2726
+
2727
+ asm volatile("v4fmaddps (%eax), %zmm0, %zmm4");
2728
+ asm volatile("v4fmaddps 0x12345678(%eax,%ecx,8),%zmm0,%zmm4");
2729
+
2730
+ /* AVX-512: Op code 0f 38 9b */
2731
+
2732
+ asm volatile("vfmsub132ss %xmm1, %xmm2, %xmm3");
2733
+ asm volatile("vfmsub132ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm3");
2734
+
2735
+ asm volatile("vfmsub132sd %xmm1, %xmm2, %xmm3");
2736
+ asm volatile("vfmsub132sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm3");
2737
+
2738
+ asm volatile("v4fmaddss (%eax), %xmm0, %xmm4");
2739
+ asm volatile("v4fmaddss 0x12345678(%eax,%ecx,8),%xmm0,%xmm4");
20142740
20152741 /* AVX-512: Op code 0f 38 a0 */
20162742
....@@ -2031,6 +2757,32 @@
20312757
20322758 asm volatile("vscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}");
20332759 asm volatile("vscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
2760
+
2761
+ /* AVX-512: Op code 0f 38 aa */
2762
+
2763
+ asm volatile("vfmsub213ps %xmm1, %xmm2, %xmm3");
2764
+ asm volatile("vfmsub213ps %ymm1, %ymm2, %ymm3");
2765
+ asm volatile("vfmsub213ps %zmm1, %zmm2, %zmm3");
2766
+ asm volatile("vfmsub213ps 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2767
+
2768
+ asm volatile("vfmsub213pd %xmm1, %xmm2, %xmm3");
2769
+ asm volatile("vfmsub213pd %ymm1, %ymm2, %ymm3");
2770
+ asm volatile("vfmsub213pd %zmm1, %zmm2, %zmm3");
2771
+ asm volatile("vfmsub213pd 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2772
+
2773
+ asm volatile("v4fnmaddps (%eax), %zmm0, %zmm4");
2774
+ asm volatile("v4fnmaddps 0x12345678(%eax,%ecx,8),%zmm0,%zmm4");
2775
+
2776
+ /* AVX-512: Op code 0f 38 ab */
2777
+
2778
+ asm volatile("vfmsub213ss %xmm1, %xmm2, %xmm3");
2779
+ asm volatile("vfmsub213ss 0x12345678(%eax,%ecx,8),%xmm2,%xmm3");
2780
+
2781
+ asm volatile("vfmsub213sd %xmm1, %xmm2, %xmm3");
2782
+ asm volatile("vfmsub213sd 0x12345678(%eax,%ecx,8),%xmm2,%xmm3");
2783
+
2784
+ asm volatile("v4fnmaddss (%eax), %xmm0, %xmm4");
2785
+ asm volatile("v4fnmaddss 0x12345678(%eax,%ecx,8),%xmm0,%xmm4");
20342786
20352787 /* AVX-512: Op code 0f 38 b4 */
20362788
....@@ -2069,6 +2821,44 @@
20692821
20702822 asm volatile("vrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}");
20712823 asm volatile("vrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}");
2824
+
2825
+ /* AVX-512: Op code 0f 38 cf */
2826
+
2827
+ asm volatile("gf2p8mulb %xmm1, %xmm3");
2828
+ asm volatile("gf2p8mulb 0x12345678(%eax,%ecx,8),%xmm3");
2829
+
2830
+ asm volatile("vgf2p8mulb %xmm1, %xmm2, %xmm3");
2831
+ asm volatile("vgf2p8mulb %ymm1, %ymm2, %ymm3");
2832
+ asm volatile("vgf2p8mulb %zmm1, %zmm2, %zmm3");
2833
+ asm volatile("vgf2p8mulb 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2834
+
2835
+ /* AVX-512: Op code 0f 38 dc */
2836
+
2837
+ asm volatile("vaesenc %xmm1, %xmm2, %xmm3");
2838
+ asm volatile("vaesenc %ymm1, %ymm2, %ymm3");
2839
+ asm volatile("vaesenc %zmm1, %zmm2, %zmm3");
2840
+ asm volatile("vaesenc 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2841
+
2842
+ /* AVX-512: Op code 0f 38 dd */
2843
+
2844
+ asm volatile("vaesenclast %xmm1, %xmm2, %xmm3");
2845
+ asm volatile("vaesenclast %ymm1, %ymm2, %ymm3");
2846
+ asm volatile("vaesenclast %zmm1, %zmm2, %zmm3");
2847
+ asm volatile("vaesenclast 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2848
+
2849
+ /* AVX-512: Op code 0f 38 de */
2850
+
2851
+ asm volatile("vaesdec %xmm1, %xmm2, %xmm3");
2852
+ asm volatile("vaesdec %ymm1, %ymm2, %ymm3");
2853
+ asm volatile("vaesdec %zmm1, %zmm2, %zmm3");
2854
+ asm volatile("vaesdec 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
2855
+
2856
+ /* AVX-512: Op code 0f 38 df */
2857
+
2858
+ asm volatile("vaesdeclast %xmm1, %xmm2, %xmm3");
2859
+ asm volatile("vaesdeclast %ymm1, %ymm2, %ymm3");
2860
+ asm volatile("vaesdeclast %zmm1, %zmm2, %zmm3");
2861
+ asm volatile("vaesdeclast 0x12345678(%eax,%ecx,8),%zmm2,%zmm3");
20722862
20732863 /* AVX-512: Op code 0f 3a 03 */
20742864
....@@ -2189,6 +2979,12 @@
21892979 asm volatile("vshufi32x4 $0x12,%zmm5,%zmm6,%zmm7");
21902980 asm volatile("vshufi64x2 $0x12,%zmm5,%zmm6,%zmm7");
21912981
2982
+ /* AVX-512: Op code 0f 3a 44 */
2983
+
2984
+ asm volatile("vpclmulqdq $0x12,%xmm1,%xmm2,%xmm3");
2985
+ asm volatile("vpclmulqdq $0x12,%ymm1,%ymm2,%ymm3");
2986
+ asm volatile("vpclmulqdq $0x12,%zmm1,%zmm2,%zmm3");
2987
+
21922988 /* AVX-512: Op code 0f 3a 50 */
21932989
21942990 asm volatile("vrangeps $0x12,%zmm5,%zmm6,%zmm7");
....@@ -2228,6 +3024,54 @@
22283024
22293025 asm volatile("vfpclassss $0x12,%xmm7,%k5");
22303026 asm volatile("vfpclasssd $0x12,%xmm7,%k5");
3027
+
3028
+ /* AVX-512: Op code 0f 3a 70 */
3029
+
3030
+ asm volatile("vpshldw $0x12,%xmm1,%xmm2,%xmm3");
3031
+ asm volatile("vpshldw $0x12,%ymm1,%ymm2,%ymm3");
3032
+ asm volatile("vpshldw $0x12,%zmm1,%zmm2,%zmm3");
3033
+
3034
+ /* AVX-512: Op code 0f 3a 71 */
3035
+
3036
+ asm volatile("vpshldd $0x12,%xmm1,%xmm2,%xmm3");
3037
+ asm volatile("vpshldd $0x12,%ymm1,%ymm2,%ymm3");
3038
+ asm volatile("vpshldd $0x12,%zmm1,%zmm2,%zmm3");
3039
+
3040
+ asm volatile("vpshldq $0x12,%xmm1,%xmm2,%xmm3");
3041
+ asm volatile("vpshldq $0x12,%ymm1,%ymm2,%ymm3");
3042
+ asm volatile("vpshldq $0x12,%zmm1,%zmm2,%zmm3");
3043
+
3044
+ /* AVX-512: Op code 0f 3a 72 */
3045
+
3046
+ asm volatile("vpshrdw $0x12,%xmm1,%xmm2,%xmm3");
3047
+ asm volatile("vpshrdw $0x12,%ymm1,%ymm2,%ymm3");
3048
+ asm volatile("vpshrdw $0x12,%zmm1,%zmm2,%zmm3");
3049
+
3050
+ /* AVX-512: Op code 0f 3a 73 */
3051
+
3052
+ asm volatile("vpshrdd $0x12,%xmm1,%xmm2,%xmm3");
3053
+ asm volatile("vpshrdd $0x12,%ymm1,%ymm2,%ymm3");
3054
+ asm volatile("vpshrdd $0x12,%zmm1,%zmm2,%zmm3");
3055
+
3056
+ asm volatile("vpshrdq $0x12,%xmm1,%xmm2,%xmm3");
3057
+ asm volatile("vpshrdq $0x12,%ymm1,%ymm2,%ymm3");
3058
+ asm volatile("vpshrdq $0x12,%zmm1,%zmm2,%zmm3");
3059
+
3060
+ /* AVX-512: Op code 0f 3a ce */
3061
+
3062
+ asm volatile("gf2p8affineqb $0x12,%xmm1,%xmm3");
3063
+
3064
+ asm volatile("vgf2p8affineqb $0x12,%xmm1,%xmm2,%xmm3");
3065
+ asm volatile("vgf2p8affineqb $0x12,%ymm1,%ymm2,%ymm3");
3066
+ asm volatile("vgf2p8affineqb $0x12,%zmm1,%zmm2,%zmm3");
3067
+
3068
+ /* AVX-512: Op code 0f 3a cf */
3069
+
3070
+ asm volatile("gf2p8affineinvqb $0x12,%xmm1,%xmm3");
3071
+
3072
+ asm volatile("vgf2p8affineinvqb $0x12,%xmm1,%xmm2,%xmm3");
3073
+ asm volatile("vgf2p8affineinvqb $0x12,%ymm1,%ymm2,%ymm3");
3074
+ asm volatile("vgf2p8affineinvqb $0x12,%zmm1,%zmm2,%zmm3");
22313075
22323076 /* AVX-512: Op code 0f 72 (Grp13) */
22333077
....@@ -2656,6 +3500,12 @@
26563500 asm volatile("xsaveopt (%eax)");
26573501 asm volatile("mfence");
26583502
3503
+ /* cldemote m8 */
3504
+
3505
+ asm volatile("cldemote (%eax)");
3506
+ asm volatile("cldemote (0x12345678)");
3507
+ asm volatile("cldemote 0x12345678(%eax,%ecx,8)");
3508
+
26593509 /* xsavec mem */
26603510
26613511 asm volatile("xsavec (%eax)");
....@@ -2684,8 +3534,158 @@
26843534 asm volatile("ptwritel (0x12345678)");
26853535 asm volatile("ptwritel 0x12345678(%eax,%ecx,8)");
26863536
3537
+ /* tpause */
3538
+
3539
+ asm volatile("tpause %ebx");
3540
+
3541
+ /* umonitor */
3542
+
3543
+ asm volatile("umonitor %ax");
3544
+ asm volatile("umonitor %eax");
3545
+
3546
+ /* umwait */
3547
+
3548
+ asm volatile("umwait %eax");
3549
+
3550
+ /* movdiri */
3551
+
3552
+ asm volatile("movdiri %eax,(%ebx)");
3553
+ asm volatile("movdiri %ecx,0x12345678(%eax)");
3554
+
3555
+ /* movdir64b */
3556
+
3557
+ asm volatile("movdir64b (%eax),%ebx");
3558
+ asm volatile("movdir64b 0x12345678(%eax),%ecx");
3559
+ asm volatile("movdir64b (%si),%bx");
3560
+ asm volatile("movdir64b 0x1234(%si),%cx");
3561
+
3562
+ /* enqcmd */
3563
+
3564
+ asm volatile("enqcmd (%eax),%ebx");
3565
+ asm volatile("enqcmd 0x12345678(%eax),%ecx");
3566
+ asm volatile("enqcmd (%si),%bx");
3567
+ asm volatile("enqcmd 0x1234(%si),%cx");
3568
+
3569
+ /* enqcmds */
3570
+
3571
+ asm volatile("enqcmds (%eax),%ebx");
3572
+ asm volatile("enqcmds 0x12345678(%eax),%ecx");
3573
+ asm volatile("enqcmds (%si),%bx");
3574
+ asm volatile("enqcmds 0x1234(%si),%cx");
3575
+
3576
+ /* incsspd */
3577
+
3578
+ asm volatile("incsspd %eax");
3579
+ /* Also check instructions in the same group encoding as incsspd */
3580
+ asm volatile("xrstor (%eax)");
3581
+ asm volatile("xrstor (0x12345678)");
3582
+ asm volatile("xrstor 0x12345678(%eax,%ecx,8)");
3583
+ asm volatile("lfence");
3584
+
3585
+ /* rdsspd */
3586
+
3587
+ asm volatile("rdsspd %eax");
3588
+
3589
+ /* saveprevssp */
3590
+
3591
+ asm volatile("saveprevssp");
3592
+
3593
+ /* rstorssp */
3594
+
3595
+ asm volatile("rstorssp (%eax)");
3596
+ asm volatile("rstorssp (0x12345678)");
3597
+ asm volatile("rstorssp 0x12345678(%eax,%ecx,8)");
3598
+
3599
+ /* wrssd */
3600
+
3601
+ asm volatile("wrssd %ecx,(%eax)");
3602
+ asm volatile("wrssd %edx,(0x12345678)");
3603
+ asm volatile("wrssd %edx,0x12345678(%eax,%ecx,8)");
3604
+
3605
+ /* wrussd */
3606
+
3607
+ asm volatile("wrussd %ecx,(%eax)");
3608
+ asm volatile("wrussd %edx,(0x12345678)");
3609
+ asm volatile("wrussd %edx,0x12345678(%eax,%ecx,8)");
3610
+
3611
+ /* setssbsy */
3612
+
3613
+ asm volatile("setssbsy");
3614
+ /* Also check instructions in the same group encoding as setssbsy */
3615
+ asm volatile("rdpkru");
3616
+ asm volatile("wrpkru");
3617
+
3618
+ /* clrssbsy */
3619
+
3620
+ asm volatile("clrssbsy (%eax)");
3621
+ asm volatile("clrssbsy (0x12345678)");
3622
+ asm volatile("clrssbsy 0x12345678(%eax,%ecx,8)");
3623
+
3624
+ /* endbr32/64 */
3625
+
3626
+ asm volatile("endbr32");
3627
+ asm volatile("endbr64");
3628
+
3629
+ /* call with/without notrack prefix */
3630
+
3631
+ asm volatile("call *%eax"); /* Expecting: call indirect 0 */
3632
+ asm volatile("call *(%eax)"); /* Expecting: call indirect 0 */
3633
+ asm volatile("call *(0x12345678)"); /* Expecting: call indirect 0 */
3634
+ asm volatile("call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
3635
+
3636
+ asm volatile("bnd call *%eax"); /* Expecting: call indirect 0 */
3637
+ asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */
3638
+ asm volatile("bnd call *(0x12345678)"); /* Expecting: call indirect 0 */
3639
+ asm volatile("bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
3640
+
3641
+ asm volatile("notrack call *%eax"); /* Expecting: call indirect 0 */
3642
+ asm volatile("notrack call *(%eax)"); /* Expecting: call indirect 0 */
3643
+ asm volatile("notrack call *(0x12345678)"); /* Expecting: call indirect 0 */
3644
+ asm volatile("notrack call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
3645
+
3646
+ asm volatile("notrack bnd call *%eax"); /* Expecting: call indirect 0 */
3647
+ asm volatile("notrack bnd call *(%eax)"); /* Expecting: call indirect 0 */
3648
+ asm volatile("notrack bnd call *(0x12345678)"); /* Expecting: call indirect 0 */
3649
+ asm volatile("notrack bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
3650
+
3651
+ /* jmp with/without notrack prefix */
3652
+
3653
+ asm volatile("jmp *%eax"); /* Expecting: jmp indirect 0 */
3654
+ asm volatile("jmp *(%eax)"); /* Expecting: jmp indirect 0 */
3655
+ asm volatile("jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
3656
+ asm volatile("jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
3657
+
3658
+ asm volatile("bnd jmp *%eax"); /* Expecting: jmp indirect 0 */
3659
+ asm volatile("bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */
3660
+ asm volatile("bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
3661
+ asm volatile("bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
3662
+
3663
+ asm volatile("notrack jmp *%eax"); /* Expecting: jmp indirect 0 */
3664
+ asm volatile("notrack jmp *(%eax)"); /* Expecting: jmp indirect 0 */
3665
+ asm volatile("notrack jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
3666
+ asm volatile("notrack jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
3667
+
3668
+ asm volatile("notrack bnd jmp *%eax"); /* Expecting: jmp indirect 0 */
3669
+ asm volatile("notrack bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */
3670
+ asm volatile("notrack bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
3671
+ asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
3672
+
26873673 #endif /* #ifndef __x86_64__ */
26883674
3675
+ /* SGX */
3676
+
3677
+ asm volatile("encls");
3678
+ asm volatile("enclu");
3679
+ asm volatile("enclv");
3680
+
3681
+ /* pconfig */
3682
+
3683
+ asm volatile("pconfig");
3684
+
3685
+ /* wbnoinvd */
3686
+
3687
+ asm volatile("wbnoinvd");
3688
+
26893689 /* Following line is a marker for the awk script - do not change */
26903690 asm volatile("rdtsc"); /* Stop here */
26913691