@@ -83,6 +83,27 @@ entry:
83
83
ret <32 x half > %3
84
84
}
85
85
86
+ define dso_local <32 x half > @test6 (<16 x i32 > %a , <16 x float > %b ) local_unnamed_addr #0 {
87
+ ; CHECK-LABEL: test6:
88
+ ; CHECK: # %bb.0: # %entry
89
+ ; CHECK-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
90
+ ; CHECK-NEXT: vfcmulcph %zmm0, %zmm1, %zmm3
91
+ ; CHECK-NEXT: vfcmaddcph %zmm0, %zmm2, %zmm3
92
+ ; CHECK-NEXT: vaddph %zmm1, %zmm3, %zmm0
93
+ ; CHECK-NEXT: retq
94
+ entry:
95
+ %0 = xor <16 x i32 > %a , splat (i32 -2147483648 )
96
+ %1 = bitcast <16 x i32 > %0 to <16 x float >
97
+ %2 = tail call <16 x float > @llvm.x86.avx512fp16.mask.vfmul.cph.512 (<16 x float > splat (float 1 .000000e+00 ), <16 x float > %1 , <16 x float > zeroinitializer , i16 -1 , i32 4 )
98
+ %3 = bitcast <16 x float > %2 to <32 x half >
99
+ %4 = tail call <16 x float > @llvm.x86.avx512fp16.mask.vfmul.cph.512 (<16 x float > %1 , <16 x float > %b , <16 x float > zeroinitializer , i16 -1 , i32 4 )
100
+ %5 = bitcast <16 x float > %4 to <32 x half >
101
+ %6 = fadd <32 x half > %3 , %5
102
+ %7 = bitcast <16 x float > %b to <32 x half >
103
+ %8 = fadd <32 x half > %6 , %7
104
+ ret <32 x half > %8
105
+ }
106
+
86
107
declare <16 x float > @llvm.x86.avx512fp16.mask.vfmul.cph.512 (<16 x float >, <16 x float >, <16 x float >, i16 , i32 immarg)
87
108
declare <8 x float > @llvm.x86.avx512fp16.mask.vfmul.cph.256 (<8 x float >, <8 x float >, <8 x float >, i8 )
88
109
declare <4 x float > @llvm.x86.avx512fp16.mask.vfmul.cph.128 (<4 x float >, <4 x float >, <4 x float >, i8 )
0 commit comments