Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit c7ebf69

Browse files
committed
[x86] don't try to create a vector integer inst for an SSE1 target (PR30512)
This bug was introduced with: http://reviews.llvm.org/rL272511 We need to restrict the lowering to v4f32 comparisons because that's all SSE1 can handle. This should fix: https://llvm.org/bugs/show_bug.cgi?id=28044 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282336 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 3a296bc commit c7ebf69

File tree

2 files changed

+67
-3
lines changed

2 files changed

+67
-3
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31064,9 +31064,10 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
3106431064
}
3106531065
}
3106631066

31067-
// For an SSE1-only target, lower to X86ISD::CMPP early to avoid scalarization
31068-
// via legalization because v4i32 is not a legal type.
31069-
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32)
31067+
// For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early
31068+
// to avoid scalarization via legalization because v4i32 is not a legal type.
31069+
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&
31070+
LHS.getValueType() == MVT::v4f32)
3107031071
return LowerVSETCC(SDValue(N, 0), Subtarget, DAG);
3107131072

3107231073
return SDValue();

test/CodeGen/X86/sse1.ll

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,66 @@ define <4 x float> @PR28044(<4 x float> %a0, <4 x float> %a1) nounwind {
148148
ret <4 x float> %res
149149
}
150150

151+
; Don't crash trying to do the impossible: an integer vector comparison doesn't exist, so we must scalarize.
152+
; https://llvm.org/bugs/show_bug.cgi?id=30512
153+
154+
define <4 x i32> @PR30512(<4 x i32> %x, <4 x i32> %y) nounwind {
155+
; X32-LABEL: PR30512:
156+
; X32: # BB#0:
157+
; X32-NEXT: pushl %ebp
158+
; X32-NEXT: pushl %ebx
159+
; X32-NEXT: pushl %edi
160+
; X32-NEXT: pushl %esi
161+
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
162+
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
163+
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
164+
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
165+
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
166+
; X32-NEXT: xorl %ecx, %ecx
167+
; X32-NEXT: cmpl {{[0-9]+}}(%esp), %edx
168+
; X32-NEXT: sete %cl
169+
; X32-NEXT: xorl %edx, %edx
170+
; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ebx
171+
; X32-NEXT: sete %dl
172+
; X32-NEXT: xorl %ebx, %ebx
173+
; X32-NEXT: cmpl {{[0-9]+}}(%esp), %edi
174+
; X32-NEXT: sete %bl
175+
; X32-NEXT: xorl %eax, %eax
176+
; X32-NEXT: cmpl {{[0-9]+}}(%esp), %esi
177+
; X32-NEXT: sete %al
178+
; X32-NEXT: movl %eax, 12(%ebp)
179+
; X32-NEXT: movl %ebx, 8(%ebp)
180+
; X32-NEXT: movl %edx, 4(%ebp)
181+
; X32-NEXT: movl %ecx, (%ebp)
182+
; X32-NEXT: movl %ebp, %eax
183+
; X32-NEXT: popl %esi
184+
; X32-NEXT: popl %edi
185+
; X32-NEXT: popl %ebx
186+
; X32-NEXT: popl %ebp
187+
; X32-NEXT: retl $4
188+
;
189+
; X64-LABEL: PR30512:
190+
; X64: # BB#0:
191+
; X64-NEXT: xorl %eax, %eax
192+
; X64-NEXT: cmpl %r9d, %esi
193+
; X64-NEXT: sete %al
194+
; X64-NEXT: xorl %esi, %esi
195+
; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %edx
196+
; X64-NEXT: sete %sil
197+
; X64-NEXT: xorl %edx, %edx
198+
; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %ecx
199+
; X64-NEXT: sete %dl
200+
; X64-NEXT: xorl %ecx, %ecx
201+
; X64-NEXT: cmpl {{[0-9]+}}(%rsp), %r8d
202+
; X64-NEXT: sete %cl
203+
; X64-NEXT: movl %ecx, 12(%rdi)
204+
; X64-NEXT: movl %edx, 8(%rdi)
205+
; X64-NEXT: movl %esi, 4(%rdi)
206+
; X64-NEXT: movl %eax, (%rdi)
207+
; X64-NEXT: movq %rdi, %rax
208+
; X64-NEXT: retq
209+
%cmp = icmp eq <4 x i32> %x, %y
210+
%zext = zext <4 x i1> %cmp to <4 x i32>
211+
ret <4 x i32> %zext
212+
}
213+

0 commit comments

Comments
 (0)