Skip to content

Commit b1a48af

Browse files
authored
[DAG] SimplifyDemandedVectorElts - add handling for INT<->FP conversions (#117884)
1 parent 154c7c0 commit b1a48af

File tree

8 files changed

+78
-80
lines changed

8 files changed

+78
-80
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3736,6 +3736,15 @@ bool TargetLowering::SimplifyDemandedVectorElts(
37363736
KnownUndef.clearAllBits();
37373737
}
37383738
break;
3739+
case ISD::SINT_TO_FP:
3740+
case ISD::UINT_TO_FP:
3741+
case ISD::FP_TO_SINT:
3742+
case ISD::FP_TO_UINT:
3743+
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3744+
KnownZero, TLO, Depth + 1))
3745+
return true;
3746+
// Don't fall through to generic undef -> undef handling.
3747+
return false;
37393748
default: {
37403749
if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
37413750
if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,

llvm/test/CodeGen/PowerPC/pr38087.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0
1111
define void @draw_llvm_vs_variant0(<4 x float> %x) {
1212
; CHECK-LABEL: draw_llvm_vs_variant0:
1313
; CHECK: # %bb.0: # %entry
14-
; CHECK-NEXT: lxsd v3, 0(r3)
15-
; CHECK-NEXT: vmrghh v3, v3, v3
14+
; CHECK-NEXT: lxsihzx v3, 0, r3
1615
; CHECK-NEXT: vextsh2w v3, v3
16+
; CHECK-NEXT: xxmrghw v3, v3, v3
1717
; CHECK-NEXT: xvcvsxwsp vs0, v3
1818
; CHECK-NEXT: xxspltw vs0, vs0, 2
1919
; CHECK-NEXT: xvmaddasp vs0, v2, v2

llvm/test/CodeGen/X86/avx10_2fptosi_satcvtds.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,7 @@ define i64 @test_signed_i64_f64(double %f) nounwind {
8585
; X86-NEXT: pushl %edi
8686
; X86-NEXT: pushl %esi
8787
; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
88-
; X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
89-
; X86-NEXT: vcvttpd2qq %xmm1, %xmm1
88+
; X86-NEXT: vcvttpd2qq %xmm0, %xmm1
9089
; X86-NEXT: vmovd %xmm1, %esi
9190
; X86-NEXT: xorl %ecx, %ecx
9291
; X86-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 56 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -141,56 +141,61 @@ declare <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half>, <8 x i16>)
141141
define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
142142
; CHECK-SSE-LABEL: fmul_pow2_8xhalf:
143143
; CHECK-SSE: # %bb.0:
144-
; CHECK-SSE-NEXT: subq $88, %rsp
145-
; CHECK-SSE-NEXT: .cfi_def_cfa_offset 96
144+
; CHECK-SSE-NEXT: subq $104, %rsp
145+
; CHECK-SSE-NEXT: .cfi_def_cfa_offset 112
146146
; CHECK-SSE-NEXT: movdqa %xmm0, %xmm1
147147
; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
148148
; CHECK-SSE-NEXT: pslld $23, %xmm1
149149
; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1065353216,1065353216,1065353216,1065353216]
150150
; CHECK-SSE-NEXT: paddd %xmm2, %xmm1
151151
; CHECK-SSE-NEXT: cvttps2dq %xmm1, %xmm1
152-
; CHECK-SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill
152+
; CHECK-SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
153+
; CHECK-SSE-NEXT: pslld $16, %xmm1
154+
; CHECK-SSE-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
153155
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
154156
; CHECK-SSE-NEXT: pslld $23, %xmm0
155157
; CHECK-SSE-NEXT: paddd %xmm2, %xmm0
156158
; CHECK-SSE-NEXT: cvttps2dq %xmm0, %xmm0
159+
; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
157160
; CHECK-SSE-NEXT: pslld $16, %xmm0
158-
; CHECK-SSE-NEXT: psrld $16, %xmm0
159161
; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
160-
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
162+
; CHECK-SSE-NEXT: psrld $16, %xmm0
161163
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
162164
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
163165
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
164-
; CHECK-SSE-NEXT: cvtdq2ps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
166+
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
167+
; CHECK-SSE-NEXT: psrlq $48, %xmm0
168+
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
165169
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
166170
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
167-
; CHECK-SSE-NEXT: pshufd $238, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
168-
; CHECK-SSE-NEXT: # xmm0 = mem[2,3,2,3]
171+
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
172+
; CHECK-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
169173
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
170174
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
171175
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
172-
; CHECK-SSE-NEXT: pshufd $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
173-
; CHECK-SSE-NEXT: # xmm0 = mem[3,3,3,3]
174-
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
176+
; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
177+
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
178+
; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
179+
; CHECK-SSE-NEXT: cvtdq2ps %xmm1, %xmm0
175180
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
176181
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
177182
; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
178-
; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
179-
; CHECK-SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
180-
; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
183+
; CHECK-SSE-NEXT: psrld $16, %xmm0
181184
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
182185
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
183186
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
184-
; CHECK-SSE-NEXT: cvtdq2ps (%rsp), %xmm0 # 16-byte Folded Reload
187+
; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
188+
; CHECK-SSE-NEXT: psrlq $48, %xmm0
189+
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
185190
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
186191
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
187-
; CHECK-SSE-NEXT: pshufd $238, (%rsp), %xmm0 # 16-byte Folded Reload
188-
; CHECK-SSE-NEXT: # xmm0 = mem[2,3,2,3]
192+
; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
193+
; CHECK-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
189194
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
190195
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
191196
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
192-
; CHECK-SSE-NEXT: pshufd $255, (%rsp), %xmm0 # 16-byte Folded Reload
193-
; CHECK-SSE-NEXT: # xmm0 = mem[3,3,3,3]
197+
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
198+
; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
194199
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
195200
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
196201
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
@@ -202,39 +207,39 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
202207
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
203208
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
204209
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
205-
; CHECK-SSE-NEXT: punpcklwd (%rsp), %xmm0 # 16-byte Folded Reload
206-
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
207-
; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
210+
; CHECK-SSE-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload
211+
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
212+
; CHECK-SSE-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
208213
; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
209214
; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
210215
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
211216
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
212217
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
213-
; CHECK-SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
218+
; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
214219
; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
215220
; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
216221
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
217222
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
218223
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
219-
; CHECK-SSE-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload
220-
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
221-
; CHECK-SSE-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
222-
; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
223-
; CHECK-SSE-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
224+
; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
225+
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
226+
; CHECK-SSE-NEXT: punpckldq (%rsp), %xmm0 # 16-byte Folded Reload
227+
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
228+
; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
224229
; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
225230
; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
226231
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
227232
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
228233
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
229-
; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
234+
; CHECK-SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
230235
; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
231236
; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
232237
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
233238
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
234239
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
235-
; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
236-
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
237-
; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
240+
; CHECK-SSE-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload
241+
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
242+
; CHECK-SSE-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
238243
; CHECK-SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
239244
; CHECK-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero
240245
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
@@ -246,14 +251,13 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
246251
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
247252
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
248253
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
249-
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
250-
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
251-
; CHECK-SSE-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
252-
; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
253-
; CHECK-SSE-NEXT: punpcklqdq (%rsp), %xmm1 # 16-byte Folded Reload
254-
; CHECK-SSE-NEXT: # xmm1 = xmm1[0],mem[0]
255-
; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0
256-
; CHECK-SSE-NEXT: addq $88, %rsp
254+
; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
255+
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
256+
; CHECK-SSE-NEXT: punpckldq (%rsp), %xmm0 # 16-byte Folded Reload
257+
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
258+
; CHECK-SSE-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
259+
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0]
260+
; CHECK-SSE-NEXT: addq $104, %rsp
257261
; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8
258262
; CHECK-SSE-NEXT: retq
259263
;
@@ -1028,17 +1032,17 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
10281032
; CHECK-SSE-NEXT: pslld $23, %xmm0
10291033
; CHECK-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10301034
; CHECK-SSE-NEXT: cvttps2dq %xmm0, %xmm0
1031-
; CHECK-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
1032-
; CHECK-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,2,u,u,u,u,u,u]
1033-
; CHECK-SSE-NEXT: pxor %xmm0, %xmm0
1034-
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1035-
; CHECK-SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1036-
; CHECK-SSE-NEXT: cvtdq2ps %xmm1, %xmm0
1035+
; CHECK-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1036+
; CHECK-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,2,u,u,u,u,u,u]
1037+
; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
1038+
; CHECK-SSE-NEXT: psrld $16, %xmm0
1039+
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
10371040
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
10381041
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
1039-
; CHECK-SSE-NEXT: pshufd $85, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1040-
; CHECK-SSE-NEXT: # xmm0 = mem[1,1,1,1]
1041-
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
1042+
; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
1043+
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1044+
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1045+
; CHECK-SSE-NEXT: cvtdq2ps %xmm1, %xmm0
10421046
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
10431047
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
10441048
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -1049,8 +1053,9 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
10491053
; CHECK-SSE-NEXT: callq __extendhfsf2@PLT
10501054
; CHECK-SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
10511055
; CHECK-SSE-NEXT: callq __truncsfhf2@PLT
1052-
; CHECK-SSE-NEXT: punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
1053-
; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
1056+
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
1057+
; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1058+
; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0
10541059
; CHECK-SSE-NEXT: addq $40, %rsp
10551060
; CHECK-SSE-NEXT: retq
10561061
;

llvm/test/CodeGen/X86/fpclamptosat_vec.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -731,7 +731,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) nounwind {
731731
;
732732
; AVX512-LABEL: stest_f16i32:
733733
; AVX512: # %bb.0: # %entry
734-
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
734+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
735735
; AVX512-NEXT: vcvttps2qq %ymm0, %zmm0
736736
; AVX512-NEXT: vpmovsqd %ymm0, %xmm0
737737
; AVX512-NEXT: vzeroupper
@@ -894,7 +894,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) nounwind {
894894
;
895895
; AVX512-LABEL: utesth_f16i32:
896896
; AVX512: # %bb.0: # %entry
897-
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
897+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
898898
; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0
899899
; AVX512-NEXT: vpmovusqd %ymm0, %xmm0
900900
; AVX512-NEXT: vzeroupper
@@ -1031,7 +1031,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) nounwind {
10311031
;
10321032
; AVX512-LABEL: ustest_f16i32:
10331033
; AVX512: # %bb.0: # %entry
1034-
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
1034+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
10351035
; AVX512-NEXT: vcvttps2qq %ymm0, %zmm0
10361036
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
10371037
; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0
@@ -3343,7 +3343,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) nounwind {
33433343
;
33443344
; AVX512-LABEL: stest_f16i32_mm:
33453345
; AVX512: # %bb.0: # %entry
3346-
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
3346+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
33473347
; AVX512-NEXT: vcvttps2qq %ymm0, %zmm0
33483348
; AVX512-NEXT: vpmovsqd %ymm0, %xmm0
33493349
; AVX512-NEXT: vzeroupper
@@ -3504,7 +3504,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) nounwind {
35043504
;
35053505
; AVX512-LABEL: utesth_f16i32_mm:
35063506
; AVX512: # %bb.0: # %entry
3507-
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
3507+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
35083508
; AVX512-NEXT: vcvttps2uqq %ymm0, %zmm0
35093509
; AVX512-NEXT: vpmovusqd %ymm0, %xmm0
35103510
; AVX512-NEXT: vzeroupper
@@ -3640,7 +3640,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) nounwind {
36403640
;
36413641
; AVX512-LABEL: ustest_f16i32_mm:
36423642
; AVX512: # %bb.0: # %entry
3643-
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
3643+
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
36443644
; AVX512-NEXT: vcvttps2qq %ymm0, %zmm0
36453645
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
36463646
; AVX512-NEXT: vpmaxsq %ymm1, %ymm0, %ymm0

llvm/test/CodeGen/X86/freeze-vector.ll

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -630,13 +630,8 @@ define void @pr59677(i32 %x, ptr %out) nounwind {
630630
; X86: # %bb.0:
631631
; X86-NEXT: pushl %esi
632632
; X86-NEXT: pushl %eax
633-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
634633
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
635-
; X86-NEXT: vmovd %eax, %xmm0
636-
; X86-NEXT: orl $1, %eax
637-
; X86-NEXT: vmovd %eax, %xmm1
638-
; X86-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
639-
; X86-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
634+
; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
640635
; X86-NEXT: vpaddd %xmm0, %xmm0, %xmm0
641636
; X86-NEXT: vcvtdq2ps %xmm0, %xmm0
642637
; X86-NEXT: vmovss %xmm0, (%esp)
@@ -651,10 +646,6 @@ define void @pr59677(i32 %x, ptr %out) nounwind {
651646
; X64-NEXT: pushq %rbx
652647
; X64-NEXT: movq %rsi, %rbx
653648
; X64-NEXT: vmovd %edi, %xmm0
654-
; X64-NEXT: orl $1, %edi
655-
; X64-NEXT: vmovd %edi, %xmm1
656-
; X64-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
657-
; X64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
658649
; X64-NEXT: vpaddd %xmm0, %xmm0, %xmm0
659650
; X64-NEXT: vcvtdq2ps %xmm0, %xmm0
660651
; X64-NEXT: callq sinf@PLT

llvm/test/CodeGen/X86/vector-half-conversions.ll

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4966,17 +4966,13 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
49664966
;
49674967
; F16C-LABEL: fptosi_2f16_to_4i32:
49684968
; F16C: # %bb.0:
4969-
; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
4970-
; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
49714969
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
49724970
; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
49734971
; F16C-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
49744972
; F16C-NEXT: retq
49754973
;
49764974
; AVX512-LABEL: fptosi_2f16_to_4i32:
49774975
; AVX512: # %bb.0:
4978-
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
4979-
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
49804976
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
49814977
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
49824978
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
@@ -5104,8 +5100,6 @@ define <4 x i32> @fptoui_2f16_to_4i32(<2 x half> %a) nounwind {
51045100
;
51055101
; AVX512-FASTLANE-LABEL: fptoui_2f16_to_4i32:
51065102
; AVX512-FASTLANE: # %bb.0:
5107-
; AVX512-FASTLANE-NEXT: vxorps %xmm1, %xmm1, %xmm1
5108-
; AVX512-FASTLANE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
51095103
; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %xmm0
51105104
; AVX512-FASTLANE-NEXT: vcvttps2udq %xmm0, %xmm0
51115105
; AVX512-FASTLANE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
@@ -5212,7 +5206,7 @@ define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind {
52125206
;
52135207
; AVX512F-LABEL: fptoui_4f16_to_4i32:
52145208
; AVX512F: # %bb.0:
5215-
; AVX512F-NEXT: vcvtph2ps %xmm0, %ymm0
5209+
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
52165210
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
52175211
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
52185212
; AVX512F-NEXT: vzeroupper

llvm/test/CodeGen/X86/widen_conv-3.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ define void @convert_v2i16_to_v2f32(ptr %dst.addr, <2 x i16> %src) nounwind {
1010
; X86-SSE2-LABEL: convert_v2i16_to_v2f32:
1111
; X86-SSE2: # %bb.0: # %entry
1212
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
13-
; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
13+
; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
1414
; X86-SSE2-NEXT: psrad $16, %xmm0
1515
; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
1616
; X86-SSE2-NEXT: movlps %xmm0, (%eax)
@@ -26,7 +26,7 @@ define void @convert_v2i16_to_v2f32(ptr %dst.addr, <2 x i16> %src) nounwind {
2626
;
2727
; X64-SSE2-LABEL: convert_v2i16_to_v2f32:
2828
; X64-SSE2: # %bb.0: # %entry
29-
; X64-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
29+
; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
3030
; X64-SSE2-NEXT: psrad $16, %xmm0
3131
; X64-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
3232
; X64-SSE2-NEXT: movlps %xmm0, (%rdi)

0 commit comments

Comments
 (0)