Skip to content

Commit e165bc2

Browse files
committed
[SLP][AArch64] Extend extracts-from-scalarizable-vector.ll test for cmp cost testing. NFC
See D153507. The existing test is over-simplified, as written it should have been simpified prior to SLP vectorization. I have left it as-is to ensure the crash it was protecting against doesn't arise again. A new test with valid inputs is also added to show the incorrect costs of alt cmp vectorization.
1 parent 5537230 commit e165bc2

File tree

1 file changed

+27
-2
lines changed

1 file changed

+27
-2
lines changed

llvm/test/Transforms/SLPVectorizer/AArch64/extracts-from-scalarizable-vector.ll

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
22
; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
33

4-
define i1 @test() {
5-
; CHECK-LABEL: define i1 @test() {
4+
define i1 @degenerate() {
5+
; CHECK-LABEL: define i1 @degenerate() {
66
; CHECK-NEXT: entry:
77
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> zeroinitializer
88
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP0]])
@@ -20,3 +20,28 @@ entry:
2020
%or.cond30 = select i1 %or.cond29, i1 %cmp10, i1 false
2121
ret i1 %or.cond30
2222
}
23+
24+
define i1 @with_inputs(<4 x fp128> %a) {
25+
; CHECK-LABEL: define i1 @with_inputs
26+
; CHECK-SAME: (<4 x fp128> [[A:%.*]]) {
27+
; CHECK-NEXT: entry:
28+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x fp128> [[A]], <4 x fp128> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
29+
; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x fp128> [[TMP0]], zeroinitializer
30+
; CHECK-NEXT: [[TMP2:%.*]] = fcmp ogt <4 x fp128> [[TMP0]], zeroinitializer
31+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
32+
; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]]
33+
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]])
34+
; CHECK-NEXT: ret i1 [[TMP5]]
35+
;
36+
entry:
37+
%0 = extractelement <4 x fp128> %a, i32 0
38+
%cmp = fcmp ogt fp128 %0, 0xL00000000000000000000000000000000
39+
%cmp3 = fcmp olt fp128 %0, 0xL00000000000000000000000000000000
40+
%or.cond = and i1 %cmp, %cmp3
41+
%1 = extractelement <4 x fp128> %a, i32 1
42+
%cmp6 = fcmp ogt fp128 %1, 0xL00000000000000000000000000000000
43+
%or.cond29 = select i1 %or.cond, i1 %cmp6, i1 false
44+
%cmp10 = fcmp olt fp128 %1, 0xL00000000000000000000000000000000
45+
%or.cond30 = select i1 %or.cond29, i1 %cmp10, i1 false
46+
ret i1 %or.cond30
47+
}

0 commit comments

Comments
 (0)