Skip to content

Commit 36a484a

Browse files
committed
[InstCombine] Canonicalize scalable GEPs to use llvm.vscale intrinsic
Canonicalize getelementptr instructions for scalable vector types into ptradd representation with an explicit llvm.vscale call. This representation has better support in BasicAA, which can reason about llvm.vscale, but not plain scalable GEPs.
1 parent b329179 commit 36a484a

File tree

8 files changed

+61
-21
lines changed

8 files changed

+61
-21
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2787,6 +2787,14 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
27872787
GEP.isInBounds()));
27882788
}
27892789

2790+
// Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
2791+
// intrinsic. This has better support in BasicAA.
2792+
if (IsGEPSrcEleScalable) {
2793+
Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
2794+
return replaceInstUsesWith(
2795+
GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.isInBounds()));
2796+
}
2797+
27902798
// Check to see if the inputs to the PHI node are getelementptr instructions.
27912799
if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
27922800
auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));

llvm/test/Analysis/ValueTracking/phi-known-bits.ll

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -932,9 +932,11 @@ define i1 @recursiveGEP_withPtrSub_scalableGEP(ptr %val1) {
932932
; CHECK-NEXT: br label [[WHILE_COND_I:%.*]]
933933
; CHECK: while.cond.i:
934934
; CHECK-NEXT: [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[VAL1:%.*]], [[ENTRY:%.*]] ]
935-
; CHECK-NEXT: [[TEST_0_I]] = getelementptr <vscale x 16 x i8>, ptr [[A_PN_I]], i64 1
936-
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TEST_0_I]], align 1
937-
; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
935+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
936+
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
937+
; CHECK-NEXT: [[TEST_0_I]] = getelementptr i8, ptr [[A_PN_I]], i64 [[TMP1]]
938+
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TEST_0_I]], align 1
939+
; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP2]], 0
938940
; CHECK-NEXT: br i1 [[CMP3_NOT_I]], label [[WHILE_END_I:%.*]], label [[WHILE_COND_I]]
939941
; CHECK: while.end.i:
940942
; CHECK-NEXT: [[BOOL:%.*]] = icmp eq ptr [[TEST_0_I]], [[VAL1]]
@@ -964,9 +966,11 @@ define i1 @recursiveGEP_withPtrSub_scalableGEP_inbounds(ptr %val1) {
964966
; CHECK-NEXT: br label [[WHILE_COND_I:%.*]]
965967
; CHECK: while.cond.i:
966968
; CHECK-NEXT: [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[VAL1:%.*]], [[ENTRY:%.*]] ]
967-
; CHECK-NEXT: [[TEST_0_I]] = getelementptr inbounds <vscale x 16 x i8>, ptr [[A_PN_I]], i64 1
968-
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[TEST_0_I]], align 1
969-
; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
969+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
970+
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
971+
; CHECK-NEXT: [[TEST_0_I]] = getelementptr inbounds i8, ptr [[A_PN_I]], i64 [[TMP1]]
972+
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TEST_0_I]], align 1
973+
; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP2]], 0
970974
; CHECK-NEXT: br i1 [[CMP3_NOT_I]], label [[WHILE_END_I:%.*]], label [[WHILE_COND_I]]
971975
; CHECK: while.end.i:
972976
; CHECK-NEXT: [[BOOL:%.*]] = icmp eq ptr [[TEST_0_I]], [[VAL1]]

llvm/test/Transforms/InstCombine/gep-vector.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,10 @@ define ptr addrspace(3) @inbounds_bitcast_vec_to_array_addrspace_matching_alloc_
127127

128128
define ptr @test_accumulate_constant_offset_vscale_nonzero(<vscale x 16 x i1> %pg, ptr %base) {
129129
; CHECK-LABEL: @test_accumulate_constant_offset_vscale_nonzero(
130-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[BASE:%.*]], i64 1, i64 4
130+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
131+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
132+
; CHECK-NEXT: [[GEP_OFFS:%.*]] = or disjoint i64 [[TMP2]], 4
133+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[GEP_OFFS]]
131134
; CHECK-NEXT: ret ptr [[GEP]]
132135
;
133136
%gep = getelementptr <vscale x 16 x i8>, ptr %base, i64 1, i64 4

llvm/test/Transforms/InstCombine/gepofconstgepi8.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,11 @@ define ptr @test_scalable(ptr %base, i64 %a) {
280280
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
281281
; CHECK-NEXT: entry:
282282
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
283-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P1]], i64 [[A]]
284-
; CHECK-NEXT: [[P2:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[TMP0]], i64 1
283+
; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[A]], 1
284+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
285+
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
286+
; CHECK-NEXT: [[P2_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]]
287+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P1]], i64 [[P2_IDX]]
285288
; CHECK-NEXT: ret ptr [[P2]]
286289
;
287290
entry:

llvm/test/Transforms/InstCombine/icmp-gep.ll

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,14 @@ define void @test60_extra_use_fold(ptr %foo, i64 %start.idx, i64 %end.offset) {
472472

473473
define i1 @test_scalable_same(ptr %x) {
474474
; CHECK-LABEL: @test_scalable_same(
475-
; CHECK-NEXT: ret i1 false
475+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
476+
; CHECK-NEXT: [[A_IDX:%.*]] = shl i64 [[TMP1]], 5
477+
; CHECK-NEXT: [[A:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[A_IDX]]
478+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
479+
; CHECK-NEXT: [[B_IDX:%.*]] = shl i64 [[TMP2]], 5
480+
; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[B_IDX]]
481+
; CHECK-NEXT: [[C:%.*]] = icmp ugt ptr [[A]], [[B]]
482+
; CHECK-NEXT: ret i1 [[C]]
476483
;
477484
%a = getelementptr <vscale x 4 x i8>, ptr %x, i64 8
478485
%b = getelementptr inbounds <vscale x 4 x i8>, ptr %x, i64 8
@@ -507,11 +514,11 @@ define i1 @test_scalable_xc(ptr %x) {
507514
define i1 @test_scalable_xy(ptr %foo, i64 %i, i64 %j) {
508515
; CHECK-LABEL: @test_scalable_xy(
509516
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
510-
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 2
511-
; CHECK-NEXT: [[GEP2_IDX:%.*]] = mul nsw i64 [[TMP2]], [[J:%.*]]
517+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
518+
; CHECK-NEXT: [[GEP1_IDX:%.*]] = mul nsw i64 [[TMP2]], [[I:%.*]]
512519
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
513-
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP3]], 4
514-
; CHECK-NEXT: [[GEP1_IDX:%.*]] = mul nsw i64 [[TMP4]], [[I:%.*]]
520+
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP3]], 2
521+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = mul nsw i64 [[TMP4]], [[J:%.*]]
515522
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[GEP2_IDX]], [[GEP1_IDX]]
516523
; CHECK-NEXT: ret i1 [[CMP]]
517524
;

llvm/test/Transforms/InstCombine/opaque-ptr.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,9 @@ define ptr @geps_combinable_different_elem_type_extra_use2(ptr %a, i64 %idx) {
289289

290290
define ptr @geps_combinable_scalable(ptr %a, i64 %idx) {
291291
; CHECK-LABEL: @geps_combinable_scalable(
292-
; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds <vscale x 2 x i32>, ptr [[A:%.*]], i64 1
292+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
293+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 3
294+
; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]]
293295
; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
294296
; CHECK-NEXT: ret ptr [[A3]]
295297
;
@@ -300,7 +302,9 @@ define ptr @geps_combinable_scalable(ptr %a, i64 %idx) {
300302

301303
define ptr @geps_combinable_scalable_vector_array(ptr %a, i64 %idx) {
302304
; CHECK-LABEL: @geps_combinable_scalable_vector_array(
303-
; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds [4 x <vscale x 2 x i32>], ptr [[A:%.*]], i64 1
305+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
306+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 5
307+
; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]]
304308
; CHECK-NEXT: [[A3:%.*]] = getelementptr inbounds i8, ptr [[A2]], i64 4
305309
; CHECK-NEXT: ret ptr [[A3]]
306310
;

llvm/test/Transforms/InstCombine/scalable-vector-array.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
define <vscale x 4 x i32> @load(ptr %x) {
55
; CHECK-LABEL: define <vscale x 4 x i32> @load
66
; CHECK-SAME: (ptr [[X:%.*]]) {
7-
; CHECK-NEXT: [[A_ELT1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
7+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
8+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
9+
; CHECK-NEXT: [[A_ELT1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[TMP2]]
810
; CHECK-NEXT: [[A_UNPACK2:%.*]] = load <vscale x 4 x i32>, ptr [[A_ELT1]], align 16
911
; CHECK-NEXT: ret <vscale x 4 x i32> [[A_UNPACK2]]
1012
;
@@ -17,7 +19,9 @@ define void @store(ptr %x, <vscale x 4 x i32> %y, <vscale x 4 x i32> %z) {
1719
; CHECK-LABEL: define void @store
1820
; CHECK-SAME: (ptr [[X:%.*]], <vscale x 4 x i32> [[Y:%.*]], <vscale x 4 x i32> [[Z:%.*]]) {
1921
; CHECK-NEXT: store <vscale x 4 x i32> [[Y]], ptr [[X]], align 16
20-
; CHECK-NEXT: [[X_REPACK1:%.*]] = getelementptr inbounds [2 x <vscale x 4 x i32>], ptr [[X]], i64 0, i64 1
22+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
23+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
24+
; CHECK-NEXT: [[X_REPACK1:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[TMP2]]
2125
; CHECK-NEXT: store <vscale x 4 x i32> [[Z]], ptr [[X_REPACK1]], align 16
2226
; CHECK-NEXT: ret void
2327
;

llvm/test/Transforms/InstCombine/vscale_gep.ll

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ define <vscale x 2 x ptr> @gep_index_type_is_scalable(ptr %p) {
1414
; This test serves to verify code changes for "GEP.getNumIndices() == 1".
1515
define ptr @gep_num_of_indices_1(ptr %p) {
1616
; CHECK-LABEL: @gep_num_of_indices_1(
17-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P:%.*]], i64 1
17+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
18+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
19+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP2]]
1820
; CHECK-NEXT: ret ptr [[GEP]]
1921
;
2022
%gep = getelementptr <vscale x 4 x i32>, ptr %p, i64 1
@@ -25,7 +27,9 @@ define ptr @gep_num_of_indices_1(ptr %p) {
2527
define void @gep_bitcast(ptr %p) {
2628
; CHECK-LABEL: @gep_bitcast(
2729
; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[P:%.*]], align 16
28-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[P]], i64 1
30+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
31+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
32+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP2]]
2933
; CHECK-NEXT: store <vscale x 16 x i8> zeroinitializer, ptr [[GEP2]], align 16
3034
; CHECK-NEXT: ret void
3135
;
@@ -54,7 +58,10 @@ define i32 @gep_alloca_inbounds_vscale_zero() {
5458
define i32 @gep_alloca_inbounds_vscale_nonzero() {
5559
; CHECK-LABEL: @gep_alloca_inbounds_vscale_nonzero(
5660
; CHECK-NEXT: [[A:%.*]] = alloca <vscale x 4 x i32>, align 16
57-
; CHECK-NEXT: [[TMP:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[A]], i64 1, i64 2
61+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
62+
; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP1]], 4
63+
; CHECK-NEXT: [[TMP_OFFS:%.*]] = or disjoint i64 [[TMP2]], 8
64+
; CHECK-NEXT: [[TMP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP_OFFS]]
5865
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[TMP]], align 4
5966
; CHECK-NEXT: ret i32 [[LOAD]]
6067
;

0 commit comments

Comments
 (0)