Skip to content

Commit b242e85

Browse files
[AArch64][NFC] Prepare test cases (for D128302) to show more accurate cost estimation of extract-element could generate better assembly code.
Pre-commit the test cases (for D128302) to show that more accurate cost estimation of extract-element could generate better code. Differential Revision: https://reviews.llvm.org/D128945
1 parent 42e1035 commit b242e85

File tree

2 files changed

+91
-0
lines changed

2 files changed

+91
-0
lines changed

llvm/test/Analysis/CostModel/AArch64/kryo.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,27 @@ define void @vectorInstrCost() {
2424

2525
ret void
2626
}
27+
28+
; CHECK-LABEL: vectorInstrExtractCost
29+
define i64 @vectorInstrExtractCost(<4 x i64> %vecreg) {
30+
31+
; Vector extracts - extracting each element at index 0 is considered
32+
; free in the current implementation. When extracting element at index
33+
; 2, 2 is rounded to 0, so extracting element at index 2 has cost 0 as
34+
; well.
35+
;
36+
; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 1
37+
; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 2
38+
%t1 = extractelement <4 x i64> %vecreg, i32 1
39+
%t2 = extractelement <4 x i64> %vecreg, i32 2
40+
%ele = add i64 %t2, 1
41+
%cond = icmp eq i64 %t1, %ele
42+
43+
; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 0
44+
; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 3
45+
%t0 = extractelement <4 x i64> %vecreg, i32 0
46+
%t3 = extractelement <4 x i64> %vecreg, i32 3
47+
%val = select i1 %cond, i64 %t0 , i64 %t3
48+
49+
ret i64 %val
50+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -licm -mtriple aarch64-linux-gnu -S < %s | FileCheck %s
3+
4+
define i1 @func(ptr %0, i64 %1) {
5+
; CHECK-LABEL: @func(
6+
; CHECK-NEXT: br label [[TMP3:%.*]]
7+
; CHECK: 3:
8+
; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[TMP12:%.*]], [[TMP11:%.*]] ]
9+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[TMP1:%.*]]
10+
; CHECK-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[DOTSPLIT_LOOP_EXIT2:%.*]]
11+
; CHECK: 6:
12+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <1 x i64>, ptr [[TMP0:%.*]], i64 [[TMP4]]
13+
; CHECK-NEXT: [[TMP8:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
14+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <1 x i64> [[TMP8]], i64 0
15+
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], -1
16+
; CHECK-NEXT: br i1 [[TMP10]], label [[TMP11]], label [[DOTSPLIT_LOOP_EXIT:%.*]]
17+
; CHECK: 11:
18+
; CHECK-NEXT: [[TMP12]] = add i64 [[TMP4]], 1
19+
; CHECK-NEXT: br label [[TMP3]]
20+
; CHECK: .split.loop.exit:
21+
; CHECK-NEXT: [[DOTLCSSA7:%.*]] = phi <1 x i64> [ [[TMP8]], [[TMP6]] ]
22+
; CHECK-NEXT: [[DOTLCSSA6:%.*]] = phi i64 [ [[TMP4]], [[TMP6]] ]
23+
; CHECK-NEXT: [[DOTPH:%.*]] = phi i1 [ [[TMP5]], [[TMP6]] ]
24+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[DOTLCSSA7]], i64 0
25+
; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], -1
26+
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[DOTLCSSA6]]
27+
; CHECK-NEXT: [[TMP16:%.*]] = icmp uge i64 [[TMP15]], [[TMP1]]
28+
; CHECK-NEXT: br label [[TMP17:%.*]]
29+
; CHECK: .split.loop.exit2:
30+
; CHECK-NEXT: [[DOTPH3:%.*]] = phi i1 [ [[TMP5]], [[TMP3]] ]
31+
; CHECK-NEXT: [[DOTPH4:%.*]] = phi i1 [ undef, [[TMP3]] ]
32+
; CHECK-NEXT: br label [[TMP17]]
33+
; CHECK: 17:
34+
; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ [[DOTPH]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH3]], [[DOTSPLIT_LOOP_EXIT2]] ]
35+
; CHECK-NEXT: [[TMP19:%.*]] = phi i1 [ [[TMP16]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH4]], [[DOTSPLIT_LOOP_EXIT2]] ]
36+
; CHECK-NEXT: [[TMP20:%.*]] = xor i1 [[TMP18]], true
37+
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i1 true, i1 [[TMP19]]
38+
; CHECK-NEXT: ret i1 [[TMP21]]
39+
;
40+
br label %3
41+
42+
3: ; preds = %14, %2
43+
%4 = phi i64 [ 0, %2 ], [ %15, %14 ]
44+
%5 = icmp ult i64 %4, %1
45+
br i1 %5, label %6, label %16
46+
47+
6: ; preds = %3
48+
%7 = getelementptr inbounds <1 x i64>, ptr %0, i64 %4
49+
%8 = load <1 x i64>, ptr %7, align 8
50+
%9 = extractelement <1 x i64> %8, i64 0
51+
%10 = icmp eq i64 %9, -1
52+
%11 = xor i64 %9, -1
53+
%12 = add i64 %11, %4
54+
%13 = icmp uge i64 %12, %1
55+
br i1 %10, label %14, label %16
56+
57+
14: ; preds = %6
58+
%15 = add i64 %4, 1
59+
br label %3
60+
61+
16: ; preds = %3, %6
62+
%17 = phi i1 [ %5, %3 ], [ %5, %6 ]
63+
%18 = phi i1 [ %13, %6 ], [ undef, %3 ]
64+
%19 = xor i1 %17, true
65+
%20 = select i1 %19, i1 true, i1 %18
66+
ret i1 %20
67+
}

0 commit comments

Comments
 (0)