@@ -593,13 +593,17 @@ define void @load_v2i1_broadcast_1_v1i1_store(<2 x i1>* %a0,<1 x i1>* %a1) {
593
593
; AVX512: # %bb.0:
594
594
; AVX512-NEXT: kmovb (%rdi), %k0
595
595
; AVX512-NEXT: kshiftrb $1, %k0, %k0
596
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
597
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
596
598
; AVX512-NEXT: kmovb %k0, (%rsi)
597
599
; AVX512-NEXT: retq
598
600
;
599
601
; AVX512NOTDQ-LABEL: load_v2i1_broadcast_1_v1i1_store:
600
602
; AVX512NOTDQ: # %bb.0:
601
603
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
602
604
; AVX512NOTDQ-NEXT: kshiftrw $1, %k0, %k0
605
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
606
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
603
607
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
604
608
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
605
609
; AVX512NOTDQ-NEXT: retq
@@ -619,6 +623,8 @@ define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
619
623
; AVX512-NEXT: cmovel %ecx, %eax
620
624
; AVX512-NEXT: kmovd %eax, %k0
621
625
; AVX512-NEXT: kshiftrb $1, %k0, %k0
626
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
627
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
622
628
; AVX512-NEXT: kmovb %k0, (%rsi)
623
629
; AVX512-NEXT: retq
624
630
;
@@ -632,6 +638,8 @@ define void @load_v3i1_broadcast_1_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
632
638
; AVX512NOTDQ-NEXT: cmovel %ecx, %eax
633
639
; AVX512NOTDQ-NEXT: kmovd %eax, %k0
634
640
; AVX512NOTDQ-NEXT: kshiftrw $1, %k0, %k0
641
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
642
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
635
643
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
636
644
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
637
645
; AVX512NOTDQ-NEXT: retq
@@ -649,6 +657,8 @@ define void @load_v3i1_broadcast_2_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
649
657
; AVX512-NEXT: cmovel %eax, %ecx
650
658
; AVX512-NEXT: kmovd %ecx, %k0
651
659
; AVX512-NEXT: kshiftrb $2, %k0, %k0
660
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
661
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
652
662
; AVX512-NEXT: kmovb %k0, (%rsi)
653
663
; AVX512-NEXT: retq
654
664
;
@@ -660,6 +670,8 @@ define void @load_v3i1_broadcast_2_v1i1_store(<3 x i1>* %a0,<1 x i1>* %a1) {
660
670
; AVX512NOTDQ-NEXT: cmovel %eax, %ecx
661
671
; AVX512NOTDQ-NEXT: kmovd %ecx, %k0
662
672
; AVX512NOTDQ-NEXT: kshiftrw $2, %k0, %k0
673
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
674
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
663
675
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
664
676
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
665
677
; AVX512NOTDQ-NEXT: retq
@@ -673,13 +685,17 @@ define void @load_v4i1_broadcast_2_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
673
685
; AVX512: # %bb.0:
674
686
; AVX512-NEXT: kmovb (%rdi), %k0
675
687
; AVX512-NEXT: kshiftrb $2, %k0, %k0
688
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
689
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
676
690
; AVX512-NEXT: kmovb %k0, (%rsi)
677
691
; AVX512-NEXT: retq
678
692
;
679
693
; AVX512NOTDQ-LABEL: load_v4i1_broadcast_2_v1i1_store:
680
694
; AVX512NOTDQ: # %bb.0:
681
695
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
682
696
; AVX512NOTDQ-NEXT: kshiftrw $2, %k0, %k0
697
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
698
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
683
699
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
684
700
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
685
701
; AVX512NOTDQ-NEXT: retq
@@ -693,13 +709,17 @@ define void @load_v4i1_broadcast_3_v1i1_store(<4 x i1>* %a0,<1 x i1>* %a1) {
693
709
; AVX512: # %bb.0:
694
710
; AVX512-NEXT: kmovb (%rdi), %k0
695
711
; AVX512-NEXT: kshiftrb $3, %k0, %k0
712
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
713
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
696
714
; AVX512-NEXT: kmovb %k0, (%rsi)
697
715
; AVX512-NEXT: retq
698
716
;
699
717
; AVX512NOTDQ-LABEL: load_v4i1_broadcast_3_v1i1_store:
700
718
; AVX512NOTDQ: # %bb.0:
701
719
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
702
720
; AVX512NOTDQ-NEXT: kshiftrw $3, %k0, %k0
721
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
722
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
703
723
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
704
724
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
705
725
; AVX512NOTDQ-NEXT: retq
@@ -713,13 +733,17 @@ define void @load_v8i1_broadcast_4_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
713
733
; AVX512: # %bb.0:
714
734
; AVX512-NEXT: kmovb (%rdi), %k0
715
735
; AVX512-NEXT: kshiftrb $4, %k0, %k0
736
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
737
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
716
738
; AVX512-NEXT: kmovb %k0, (%rsi)
717
739
; AVX512-NEXT: retq
718
740
;
719
741
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_4_v1i1_store:
720
742
; AVX512NOTDQ: # %bb.0:
721
743
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
722
744
; AVX512NOTDQ-NEXT: kshiftrw $4, %k0, %k0
745
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
746
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
723
747
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
724
748
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
725
749
; AVX512NOTDQ-NEXT: retq
@@ -760,13 +784,17 @@ define void @load_v8i1_broadcast_7_v1i1_store(<8 x i1>* %a0,<1 x i1>* %a1) {
760
784
; AVX512: # %bb.0:
761
785
; AVX512-NEXT: kmovb (%rdi), %k0
762
786
; AVX512-NEXT: kshiftrb $7, %k0, %k0
787
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
788
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
763
789
; AVX512-NEXT: kmovb %k0, (%rsi)
764
790
; AVX512-NEXT: retq
765
791
;
766
792
; AVX512NOTDQ-LABEL: load_v8i1_broadcast_7_v1i1_store:
767
793
; AVX512NOTDQ: # %bb.0:
768
794
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
769
795
; AVX512NOTDQ-NEXT: kshiftrw $7, %k0, %k0
796
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
797
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
770
798
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
771
799
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
772
800
; AVX512NOTDQ-NEXT: retq
@@ -807,13 +835,17 @@ define void @load_v16i1_broadcast_8_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
807
835
; AVX512: # %bb.0:
808
836
; AVX512-NEXT: kmovw (%rdi), %k0
809
837
; AVX512-NEXT: kshiftrw $8, %k0, %k0
838
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
839
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
810
840
; AVX512-NEXT: kmovb %k0, (%rsi)
811
841
; AVX512-NEXT: retq
812
842
;
813
843
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_8_v1i1_store:
814
844
; AVX512NOTDQ: # %bb.0:
815
845
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
816
846
; AVX512NOTDQ-NEXT: kshiftrw $8, %k0, %k0
847
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
848
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
817
849
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
818
850
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
819
851
; AVX512NOTDQ-NEXT: retq
@@ -881,13 +913,17 @@ define void @load_v16i1_broadcast_15_v1i1_store(<16 x i1>* %a0,<1 x i1>* %a1) {
881
913
; AVX512: # %bb.0:
882
914
; AVX512-NEXT: kmovw (%rdi), %k0
883
915
; AVX512-NEXT: kshiftrw $15, %k0, %k0
916
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
917
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
884
918
; AVX512-NEXT: kmovb %k0, (%rsi)
885
919
; AVX512-NEXT: retq
886
920
;
887
921
; AVX512NOTDQ-LABEL: load_v16i1_broadcast_15_v1i1_store:
888
922
; AVX512NOTDQ: # %bb.0:
889
923
; AVX512NOTDQ-NEXT: kmovw (%rdi), %k0
890
924
; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
925
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
926
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
891
927
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
892
928
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
893
929
; AVX512NOTDQ-NEXT: retq
@@ -955,13 +991,17 @@ define void @load_v32i1_broadcast_16_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
955
991
; AVX512: # %bb.0:
956
992
; AVX512-NEXT: kmovd (%rdi), %k0
957
993
; AVX512-NEXT: kshiftrd $16, %k0, %k0
994
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
995
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
958
996
; AVX512-NEXT: kmovb %k0, (%rsi)
959
997
; AVX512-NEXT: retq
960
998
;
961
999
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_16_v1i1_store:
962
1000
; AVX512NOTDQ: # %bb.0:
963
1001
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
964
1002
; AVX512NOTDQ-NEXT: kshiftrd $16, %k0, %k0
1003
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
1004
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
965
1005
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
966
1006
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
967
1007
; AVX512NOTDQ-NEXT: retq
@@ -1056,13 +1096,17 @@ define void @load_v32i1_broadcast_31_v1i1_store(<32 x i1>* %a0,<1 x i1>* %a1) {
1056
1096
; AVX512: # %bb.0:
1057
1097
; AVX512-NEXT: kmovd (%rdi), %k0
1058
1098
; AVX512-NEXT: kshiftrd $31, %k0, %k0
1099
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
1100
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
1059
1101
; AVX512-NEXT: kmovb %k0, (%rsi)
1060
1102
; AVX512-NEXT: retq
1061
1103
;
1062
1104
; AVX512NOTDQ-LABEL: load_v32i1_broadcast_31_v1i1_store:
1063
1105
; AVX512NOTDQ: # %bb.0:
1064
1106
; AVX512NOTDQ-NEXT: kmovd (%rdi), %k0
1065
1107
; AVX512NOTDQ-NEXT: kshiftrd $31, %k0, %k0
1108
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
1109
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
1066
1110
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1067
1111
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1068
1112
; AVX512NOTDQ-NEXT: retq
@@ -1160,13 +1204,17 @@ define void @load_v64i1_broadcast_32_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
1160
1204
; AVX512: # %bb.0:
1161
1205
; AVX512-NEXT: kmovq (%rdi), %k0
1162
1206
; AVX512-NEXT: kshiftrq $32, %k0, %k0
1207
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
1208
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
1163
1209
; AVX512-NEXT: kmovb %k0, (%rsi)
1164
1210
; AVX512-NEXT: retq
1165
1211
;
1166
1212
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_32_v1i1_store:
1167
1213
; AVX512NOTDQ: # %bb.0:
1168
1214
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1169
1215
; AVX512NOTDQ-NEXT: kshiftrq $32, %k0, %k0
1216
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
1217
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
1170
1218
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1171
1219
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1172
1220
; AVX512NOTDQ-NEXT: retq
@@ -1286,13 +1334,17 @@ define void @load_v64i1_broadcast_63_v1i1_store(<64 x i1>* %a0,<1 x i1>* %a1) {
1286
1334
; AVX512: # %bb.0:
1287
1335
; AVX512-NEXT: kmovq (%rdi), %k0
1288
1336
; AVX512-NEXT: kshiftrq $63, %k0, %k0
1337
+ ; AVX512-NEXT: kshiftlb $7, %k0, %k0
1338
+ ; AVX512-NEXT: kshiftrb $7, %k0, %k0
1289
1339
; AVX512-NEXT: kmovb %k0, (%rsi)
1290
1340
; AVX512-NEXT: retq
1291
1341
;
1292
1342
; AVX512NOTDQ-LABEL: load_v64i1_broadcast_63_v1i1_store:
1293
1343
; AVX512NOTDQ: # %bb.0:
1294
1344
; AVX512NOTDQ-NEXT: kmovq (%rdi), %k0
1295
1345
; AVX512NOTDQ-NEXT: kshiftrq $63, %k0, %k0
1346
+ ; AVX512NOTDQ-NEXT: kshiftlw $15, %k0, %k0
1347
+ ; AVX512NOTDQ-NEXT: kshiftrw $15, %k0, %k0
1296
1348
; AVX512NOTDQ-NEXT: kmovd %k0, %eax
1297
1349
; AVX512NOTDQ-NEXT: movb %al, (%rsi)
1298
1350
; AVX512NOTDQ-NEXT: retq
0 commit comments