Skip to content

Commit 8824913

Browse files
committed
[X86][AVX512] Add second test case for PR32547
Demonstrate missing support for OR(X,KSHIFTL(Y,8)) -> KUNPCKBW as well as the existing OR(KSHIFTL(X,8),Y) -> KUNPCKBW test.
1 parent acb8922 commit 8824913

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed

llvm/test/CodeGen/X86/vector-shuffle-v1.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,7 @@ define i64 @shuf64i1_zero(i64 %a) {
882882
ret i64 %d
883883
}
884884

885+
; OR(KSHIFTL(X,8),Y) -> KUNPCKBW
885886
define void @PR32547(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, float* %p) {
886887
; AVX512F-LABEL: PR32547:
887888
; AVX512F: # %bb.0: # %entry
@@ -933,5 +934,58 @@ define void @PR32547(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float>
933934
tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> zeroinitializer, <16 x float>* %2, i32 64, <16 x i1> %3) #4
934935
ret void
935936
}
937+
938+
; OR(X, KSHIFTL(Y,8)) -> KUNPCKBW
939+
define void @PR32547_swap(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x float> %d, float* %p) {
940+
; AVX512F-LABEL: PR32547_swap:
941+
; AVX512F: # %bb.0: # %entry
942+
; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3
943+
; AVX512F-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2
944+
; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
945+
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
946+
; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k0
947+
; AVX512F-NEXT: vcmpltps %zmm3, %zmm2, %k1
948+
; AVX512F-NEXT: kshiftlw $8, %k0, %k0
949+
; AVX512F-NEXT: kshiftlw $8, %k1, %k1
950+
; AVX512F-NEXT: kshiftrw $8, %k1, %k1
951+
; AVX512F-NEXT: korw %k0, %k1, %k1
952+
; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
953+
; AVX512F-NEXT: vmovaps %zmm0, (%rdi) {%k1}
954+
; AVX512F-NEXT: vzeroupper
955+
; AVX512F-NEXT: retq
956+
;
957+
; AVX512VL-LABEL: PR32547_swap:
958+
; AVX512VL: # %bb.0: # %entry
959+
; AVX512VL-NEXT: vcmpltps %ymm1, %ymm0, %k0
960+
; AVX512VL-NEXT: vcmpltps %ymm3, %ymm2, %k1
961+
; AVX512VL-NEXT: kshiftlw $8, %k0, %k0
962+
; AVX512VL-NEXT: korw %k0, %k1, %k1
963+
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
964+
; AVX512VL-NEXT: vmovaps %zmm0, (%rdi) {%k1}
965+
; AVX512VL-NEXT: vzeroupper
966+
; AVX512VL-NEXT: retq
967+
;
968+
; VL_BW_DQ-LABEL: PR32547_swap:
969+
; VL_BW_DQ: # %bb.0: # %entry
970+
; VL_BW_DQ-NEXT: vcmpltps %ymm1, %ymm0, %k0
971+
; VL_BW_DQ-NEXT: vcmpltps %ymm3, %ymm2, %k1
972+
; VL_BW_DQ-NEXT: kshiftlw $8, %k0, %k0
973+
; VL_BW_DQ-NEXT: korw %k0, %k1, %k1
974+
; VL_BW_DQ-NEXT: vxorps %xmm0, %xmm0, %xmm0
975+
; VL_BW_DQ-NEXT: vmovaps %zmm0, (%rdi) {%k1}
976+
; VL_BW_DQ-NEXT: vzeroupper
977+
; VL_BW_DQ-NEXT: retq
978+
entry:
979+
%0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 1, i8 -1)
980+
%1 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %c, <8 x float> %d, i32 1, i8 -1)
981+
%conv.i = zext i8 %0 to i16
982+
%conv.i18 = zext i8 %1 to i16
983+
%shl = shl nuw i16 %conv.i, 8
984+
%or = or i16 %conv.i18, %shl
985+
%2 = bitcast float* %p to <16 x float>*
986+
%3 = bitcast i16 %or to <16 x i1>
987+
tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> zeroinitializer, <16 x float>* %2, i32 64, <16 x i1> %3) #4
988+
ret void
989+
}
936990
declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, i8)
937991
declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)

0 commit comments

Comments
 (0)