@@ -8,7 +8,7 @@ target triple = "aarch64-unknown-linux-gnu"
8
8
; the loop, preventing the gep (and consequently the loop induction
9
9
; update variable) from being classified as 'uniform'.
10
10
11
- define void @test_no_scalarization (ptr %a , i32 %idx , i32 %n ) #0 {
11
+ define void @test_no_scalarization (ptr %a , ptr noalias %b , i32 %idx , i32 %n ) #0 {
12
12
; CHECK-LABEL: @test_no_scalarization(
13
13
; CHECK-NEXT: L.entry:
14
14
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[IDX:%.*]], 1
@@ -39,21 +39,26 @@ define void @test_no_scalarization(ptr %a, i32 %idx, i32 %n) #0 {
39
39
; CHECK: vector.body:
40
40
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
41
41
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
42
- ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], <vscale x 2 x i32> [[VEC_IND]]
43
- ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 2 x ptr> [[TMP12]], i32 0
44
- ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP13]], i32 0
45
- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x double>, ptr [[TMP14]], align 8
46
- ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32()
47
- ; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 2
48
- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP16]]
49
- ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT2]]
50
- ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
51
- ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
52
- ; CHECK: middle.block:
42
+ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[IDX]], [[INDEX]]
43
+ ; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], 0
44
+ ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[A:%.*]], <vscale x 2 x i32> [[VEC_IND]]
45
+ ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 2 x ptr> [[TMP13]], i32 0
46
+ ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0
47
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x double>, ptr [[TMP15]], align 8
48
+ ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[B:%.*]], i32 [[TMP12]]
49
+ ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i32 0
50
+ ; CHECK-NEXT: store <vscale x 2 x double> [[WIDE_LOAD]], ptr [[TMP17]], align 8
53
51
; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32()
54
52
; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2
55
- ; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP19]], 1
56
- ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <vscale x 2 x ptr> [[TMP12]], i32 [[TMP20]]
53
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP19]]
54
+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT2]]
55
+ ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
56
+ ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
57
+ ; CHECK: middle.block:
58
+ ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32()
59
+ ; CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[TMP21]], 2
60
+ ; CHECK-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1
61
+ ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x ptr> [[TMP13]], i32 [[TMP23]]
57
62
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
58
63
; CHECK-NEXT: br i1 [[CMP_N]], label [[L_EXIT:%.*]], label [[SCALAR_PH]]
59
64
; CHECK: scalar.ph:
@@ -62,12 +67,14 @@ define void @test_no_scalarization(ptr %a, i32 %idx, i32 %n) #0 {
62
67
; CHECK: L.LoopBody:
63
68
; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[L_LOOPBODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
64
69
; CHECK-NEXT: [[INDVAR_NEXT]] = add nsw i32 [[INDVAR]], 1
65
- ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[A]], i32 [[INDVAR]]
66
- ; CHECK-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP22]], align 8
67
- ; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N]]
68
- ; CHECK-NEXT: br i1 [[TMP24]], label [[L_LOOPBODY]], label [[L_EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
70
+ ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[A]], i32 [[INDVAR]]
71
+ ; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 8
72
+ ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i64, ptr [[B]], i32 [[INDVAR]]
73
+ ; CHECK-NEXT: store double [[TMP26]], ptr [[GEP_B]], align 8
74
+ ; CHECK-NEXT: [[TMP27:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N]]
75
+ ; CHECK-NEXT: br i1 [[TMP27]], label [[L_LOOPBODY]], label [[L_EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
69
76
; CHECK: L.exit:
70
- ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[TMP22 ]], [[L_LOOPBODY]] ], [ [[TMP21 ]], [[MIDDLE_BLOCK]] ]
77
+ ; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[TMP25 ]], [[L_LOOPBODY]] ], [ [[TMP24 ]], [[MIDDLE_BLOCK]] ]
71
78
; CHECK-NEXT: store i64 1, ptr [[DOTLCSSA]], align 8
72
79
; CHECK-NEXT: ret void
73
80
;
@@ -79,6 +86,8 @@ L.LoopBody: ; preds = %L.LoopBody, %L.entr
79
86
%indvar.next = add nsw i32 %indvar , 1
80
87
%0 = getelementptr i64 , ptr %a , i32 %indvar
81
88
%1 = load double , ptr %0 , align 8
89
+ %gep.b = getelementptr i64 , ptr %b , i32 %indvar
90
+ store double %1 , ptr %gep.b
82
91
%2 = icmp slt i32 %indvar.next , %n
83
92
br i1 %2 , label %L.LoopBody , label %L.exit
84
93
0 commit comments