@@ -12867,46 +12867,25 @@ define void @mask_replication_factor8_vf8(ptr %in.maskvec, ptr %in.vec, ptr %out
12867
12867
; AVX512DQ-NEXT: vzeroupper
12868
12868
; AVX512DQ-NEXT: retq
12869
12869
;
12870
- ; AVX512BW-ONLY-LABEL: mask_replication_factor8_vf8:
12871
- ; AVX512BW-ONLY: # %bb.0:
12872
- ; AVX512BW-ONLY-NEXT: kmovq (%rdi), %k0
12873
- ; AVX512BW-ONLY-NEXT: vpmovm2b %k0, %zmm0
12874
- ; AVX512BW-ONLY-NEXT: vpbroadcastq %xmm0, %zmm0
12875
- ; AVX512BW-ONLY-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,54,54,54,54,54,54,54,54,55,55,55,55,55,55,55,55]
12876
- ; AVX512BW-ONLY-NEXT: vpmovb2m %zmm0, %k1
12877
- ; AVX512BW-ONLY-NEXT: kshiftrq $16, %k1, %k2
12878
- ; AVX512BW-ONLY-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z}
12879
- ; AVX512BW-ONLY-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z}
12880
- ; AVX512BW-ONLY-NEXT: kshiftrq $48, %k1, %k2
12881
- ; AVX512BW-ONLY-NEXT: vmovdqa32 192(%rsi), %zmm2 {%k2} {z}
12882
- ; AVX512BW-ONLY-NEXT: kshiftrq $32, %k1, %k1
12883
- ; AVX512BW-ONLY-NEXT: vmovdqa32 128(%rsi), %zmm3 {%k1} {z}
12884
- ; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm3, 128(%rdx)
12885
- ; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm2, 192(%rdx)
12886
- ; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm1, (%rdx)
12887
- ; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm0, 64(%rdx)
12888
- ; AVX512BW-ONLY-NEXT: vzeroupper
12889
- ; AVX512BW-ONLY-NEXT: retq
12890
- ;
12891
- ; AVX512VBMI-ONLY-LABEL: mask_replication_factor8_vf8:
12892
- ; AVX512VBMI-ONLY: # %bb.0:
12893
- ; AVX512VBMI-ONLY-NEXT: kmovq (%rdi), %k0
12894
- ; AVX512VBMI-ONLY-NEXT: vpmovm2b %k0, %zmm0
12895
- ; AVX512VBMI-ONLY-NEXT: vpmovsxbq %xmm0, %zmm0
12896
- ; AVX512VBMI-ONLY-NEXT: vpmovb2m %zmm0, %k1
12897
- ; AVX512VBMI-ONLY-NEXT: kshiftrq $16, %k1, %k2
12898
- ; AVX512VBMI-ONLY-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z}
12899
- ; AVX512VBMI-ONLY-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z}
12900
- ; AVX512VBMI-ONLY-NEXT: kshiftrq $48, %k1, %k2
12901
- ; AVX512VBMI-ONLY-NEXT: vmovdqa32 192(%rsi), %zmm2 {%k2} {z}
12902
- ; AVX512VBMI-ONLY-NEXT: kshiftrq $32, %k1, %k1
12903
- ; AVX512VBMI-ONLY-NEXT: vmovdqa32 128(%rsi), %zmm3 {%k1} {z}
12904
- ; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm3, 128(%rdx)
12905
- ; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm2, 192(%rdx)
12906
- ; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm1, (%rdx)
12907
- ; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm0, 64(%rdx)
12908
- ; AVX512VBMI-ONLY-NEXT: vzeroupper
12909
- ; AVX512VBMI-ONLY-NEXT: retq
12870
+ ; AVX512BW-LABEL: mask_replication_factor8_vf8:
12871
+ ; AVX512BW: # %bb.0:
12872
+ ; AVX512BW-NEXT: kmovq (%rdi), %k0
12873
+ ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
12874
+ ; AVX512BW-NEXT: vpmovsxbq %xmm0, %zmm0
12875
+ ; AVX512BW-NEXT: vpmovb2m %zmm0, %k1
12876
+ ; AVX512BW-NEXT: kshiftrq $16, %k1, %k2
12877
+ ; AVX512BW-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z}
12878
+ ; AVX512BW-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z}
12879
+ ; AVX512BW-NEXT: kshiftrq $48, %k1, %k2
12880
+ ; AVX512BW-NEXT: vmovdqa32 192(%rsi), %zmm2 {%k2} {z}
12881
+ ; AVX512BW-NEXT: kshiftrq $32, %k1, %k1
12882
+ ; AVX512BW-NEXT: vmovdqa32 128(%rsi), %zmm3 {%k1} {z}
12883
+ ; AVX512BW-NEXT: vmovdqa64 %zmm3, 128(%rdx)
12884
+ ; AVX512BW-NEXT: vmovdqa64 %zmm2, 192(%rdx)
12885
+ ; AVX512BW-NEXT: vmovdqa64 %zmm1, (%rdx)
12886
+ ; AVX512BW-NEXT: vmovdqa64 %zmm0, 64(%rdx)
12887
+ ; AVX512BW-NEXT: vzeroupper
12888
+ ; AVX512BW-NEXT: retq
12910
12889
%src.mask.padded = load <64 x i1>, ptr %in.maskvec, align 64
12911
12890
%src.mask = shufflevector <64 x i1> %src.mask.padded, <64 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
12912
12891
%tgt.mask = shufflevector <8 x i1> %src.mask, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
0 commit comments