Skip to content

Commit f038db3

Browse files
lfmeadowronlieb
authored andcommitted
Fix for performance drops in rocFFT for AMDGPU. Revert e13bed4 This also improves regressions in the kripke benchmark.
Change-Id: I96a5830433b4e325038e6d06b01af73eb2768a7c
1 parent be063d4 commit f038db3

File tree

18 files changed

+324
-321
lines changed

18 files changed

+324
-321
lines changed

clang/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,9 @@ void* test9(B* x) { return dynamic_cast<void*>(x); }
9494
// CHECK-NEXT: [[VBTBL:%.*]] = load ptr, ptr [[VBPTR]], align 4
9595
// CHECK-NEXT: [[VBOFFP:%.*]] = getelementptr inbounds i8, ptr [[VBTBL]], i32 4
9696
// CHECK-NEXT: [[VBOFFS:%.*]] = load i32, ptr [[VBOFFP]], align 4
97-
// CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr %x, i32 [[VBOFFS]]
98-
// CHECK-NEXT: [[ADJ:%.*]] = getelementptr i8, ptr [[BASE]], i32 4
99-
// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__RTCastToVoid(ptr [[ADJ]])
97+
// CHECK-NEXT: [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4
98+
// CHECK-NEXT: [[ADJ:%.*]] = getelementptr inbounds i8, ptr %x, i32 [[DELTA]]
99+
// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__RTCastToVoid(ptr nonnull [[ADJ]])
100100
// CHECK-NEXT: br label
101101
// CHECK: [[RET:%.*]] = phi ptr
102102
// CHECK-NEXT: ret ptr [[RET]]

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2947,10 +2947,12 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
29472947
}
29482948
}
29492949
}
2950+
29502951
// We do not handle pointer-vector geps here.
29512952
if (GEPType->isVectorTy())
29522953
return nullptr;
29532954

2955+
#if 0//<<<<<<< HEAD
29542956
if (GEP.getNumIndices() == 1) {
29552957
// We can only preserve inbounds if the original gep is inbounds, the add
29562958
// is nsw, and the add operands are non-negative.
@@ -3002,6 +3004,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
30023004
}
30033005
}
30043006

3007+
#endif//>>>>>>> parent of e13bed4c5f35 ([PATCH] [llvm] [InstCombine] Canonicalise ADD+GEP)
30053008
if (!GEP.isInBounds()) {
30063009
unsigned IdxWidth =
30073010
DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());

llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll

Lines changed: 63 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,22 @@ target triple = "hexagon"
1212
define <64 x i16> @f0(ptr %a0, i32 %a1) #0 {
1313
; CHECK-LABEL: @f0(
1414
; CHECK-NEXT: b0:
15-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[A0:%.*]], i32 [[A1:%.*]]
16-
; CHECK-NEXT: [[V1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
15+
; CHECK-NEXT: [[V0:%.*]] = add i32 [[A1:%.*]], 64
16+
; CHECK-NEXT: [[V1:%.*]] = getelementptr i16, ptr [[A0:%.*]], i32 [[V0]]
1717
; CHECK-NEXT: [[PTI:%.*]] = ptrtoint ptr [[V1]] to i32
18-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[PTI]], -128
19-
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[AND]] to ptr
18+
; CHECK-NEXT: [[ADD:%.*]] = and i32 [[PTI]], -128
19+
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[ADD]] to ptr
2020
; CHECK-NEXT: [[PTI1:%.*]] = ptrtoint ptr [[V1]] to i32
2121
; CHECK-NEXT: [[ALD14:%.*]] = load <32 x i32>, ptr [[ITP]], align 128, !tbaa [[TBAA0:![0-9]+]]
2222
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[ITP]], i32 128
2323
; CHECK-NEXT: [[ALD2:%.*]] = load <128 x i8>, ptr [[GEP]], align 128, !tbaa [[TBAA0]]
2424
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[ITP]], i32 256
25-
; CHECK-NEXT: [[AND4:%.*]] = and i32 [[PTI1]], 127
26-
; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[AND4]], 0
25+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[PTI1]], 127
26+
; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[TMP0]], 0
2727
; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP3]], i32 0), !tbaa [[TBAA0]]
28-
; CHECK-NEXT: [[CST5:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
29-
; CHECK-NEXT: [[CUP7:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CST5]], <32 x i32> [[ALD14]], i32 [[PTI1]])
30-
; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP7]] to <64 x i16>
28+
; CHECK-NEXT: [[CST4:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
29+
; CHECK-NEXT: [[CUP6:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CST4]], <32 x i32> [[ALD14]], i32 [[PTI1]])
30+
; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP6]] to <64 x i16>
3131
; CHECK-NEXT: [[CST9:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
3232
; CHECK-NEXT: [[CUP10:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CUP]], <32 x i32> [[CST9]], i32 [[PTI1]])
3333
; CHECK-NEXT: [[CST13:%.*]] = bitcast <32 x i32> [[CUP10]] to <64 x i16>
@@ -50,22 +50,22 @@ b0:
5050
define <64 x i16> @f1(ptr %a0, i32 %a1) #0 {
5151
; CHECK-LABEL: @f1(
5252
; CHECK-NEXT: b0:
53-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[A0:%.*]], i32 [[A1:%.*]]
54-
; CHECK-NEXT: [[V1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
53+
; CHECK-NEXT: [[V0:%.*]] = add i32 [[A1:%.*]], 64
54+
; CHECK-NEXT: [[V1:%.*]] = getelementptr i16, ptr [[A0:%.*]], i32 [[V0]]
5555
; CHECK-NEXT: [[PTI:%.*]] = ptrtoint ptr [[V1]] to i32
56-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[PTI]], -128
57-
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[AND]] to ptr
56+
; CHECK-NEXT: [[ADD:%.*]] = and i32 [[PTI]], -128
57+
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[ADD]] to ptr
5858
; CHECK-NEXT: [[PTI1:%.*]] = ptrtoint ptr [[V1]] to i32
5959
; CHECK-NEXT: [[ALD14:%.*]] = load <32 x i32>, ptr [[ITP]], align 128, !tbaa [[TBAA0]]
6060
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[ITP]], i32 128
6161
; CHECK-NEXT: [[ALD2:%.*]] = load <128 x i8>, ptr [[GEP]], align 128
6262
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[ITP]], i32 256
63-
; CHECK-NEXT: [[AND4:%.*]] = and i32 [[PTI1]], 127
64-
; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[AND4]], 0
63+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[PTI1]], 127
64+
; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[TMP0]], 0
6565
; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP3]], i32 0)
66-
; CHECK-NEXT: [[CST5:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
67-
; CHECK-NEXT: [[CUP7:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CST5]], <32 x i32> [[ALD14]], i32 [[PTI1]])
68-
; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP7]] to <64 x i16>
66+
; CHECK-NEXT: [[CST4:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
67+
; CHECK-NEXT: [[CUP6:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CST4]], <32 x i32> [[ALD14]], i32 [[PTI1]])
68+
; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP6]] to <64 x i16>
6969
; CHECK-NEXT: [[CST9:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
7070
; CHECK-NEXT: [[CUP10:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CUP]], <32 x i32> [[CST9]], i32 [[PTI1]])
7171
; CHECK-NEXT: [[CST13:%.*]] = bitcast <32 x i32> [[CUP10]] to <64 x i16>
@@ -88,22 +88,22 @@ b0:
8888
define <64 x i16> @f2(ptr %a0, i32 %a1) #0 {
8989
; CHECK-LABEL: @f2(
9090
; CHECK-NEXT: b0:
91-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[A0:%.*]], i32 [[A1:%.*]]
92-
; CHECK-NEXT: [[V1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
91+
; CHECK-NEXT: [[V0:%.*]] = add i32 [[A1:%.*]], 64
92+
; CHECK-NEXT: [[V1:%.*]] = getelementptr i16, ptr [[A0:%.*]], i32 [[V0]]
9393
; CHECK-NEXT: [[PTI:%.*]] = ptrtoint ptr [[V1]] to i32
94-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[PTI]], -128
95-
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[AND]] to ptr
94+
; CHECK-NEXT: [[ADD:%.*]] = and i32 [[PTI]], -128
95+
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[ADD]] to ptr
9696
; CHECK-NEXT: [[PTI1:%.*]] = ptrtoint ptr [[V1]] to i32
9797
; CHECK-NEXT: [[ALD14:%.*]] = load <32 x i32>, ptr [[ITP]], align 128, !tbaa [[TBAA0]]
9898
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[ITP]], i32 128
9999
; CHECK-NEXT: [[ALD2:%.*]] = load <128 x i8>, ptr [[GEP]], align 128
100100
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[ITP]], i32 256
101-
; CHECK-NEXT: [[AND4:%.*]] = and i32 [[PTI1]], 127
102-
; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[AND4]], 0
101+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[PTI1]], 127
102+
; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[TMP0]], 0
103103
; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP3]], i32 0), !tbaa [[TBAA3:![0-9]+]]
104-
; CHECK-NEXT: [[CST5:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
105-
; CHECK-NEXT: [[CUP7:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CST5]], <32 x i32> [[ALD14]], i32 [[PTI1]])
106-
; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP7]] to <64 x i16>
104+
; CHECK-NEXT: [[CST4:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
105+
; CHECK-NEXT: [[CUP6:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CST4]], <32 x i32> [[ALD14]], i32 [[PTI1]])
106+
; CHECK-NEXT: [[CST12:%.*]] = bitcast <32 x i32> [[CUP6]] to <64 x i16>
107107
; CHECK-NEXT: [[CST9:%.*]] = bitcast <128 x i8> [[ALD2]] to <32 x i32>
108108
; CHECK-NEXT: [[CUP10:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[CUP]], <32 x i32> [[CST9]], i32 [[PTI1]])
109109
; CHECK-NEXT: [[CST13:%.*]] = bitcast <32 x i32> [[CUP10]] to <64 x i16>
@@ -126,11 +126,11 @@ b0:
126126
define void @f3(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
127127
; CHECK-LABEL: @f3(
128128
; CHECK-NEXT: b0:
129-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[A0:%.*]], i32 [[A1:%.*]]
130-
; CHECK-NEXT: [[V1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
129+
; CHECK-NEXT: [[V0:%.*]] = add i32 [[A1:%.*]], 64
130+
; CHECK-NEXT: [[V1:%.*]] = getelementptr i16, ptr [[A0:%.*]], i32 [[V0]]
131131
; CHECK-NEXT: [[PTI:%.*]] = ptrtoint ptr [[V1]] to i32
132-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[PTI]], -128
133-
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[AND]] to ptr
132+
; CHECK-NEXT: [[ADD:%.*]] = and i32 [[PTI]], -128
133+
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[ADD]] to ptr
134134
; CHECK-NEXT: [[PTI1:%.*]] = ptrtoint ptr [[V1]] to i32
135135
; CHECK-NEXT: [[CST3:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32>
136136
; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST3]], <32 x i32> undef, i32 [[PTI1]])
@@ -154,14 +154,14 @@ define void @f3(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
154154
; CHECK-NEXT: [[TRN18:%.*]] = trunc <128 x i8> [[CST12]] to <128 x i1>
155155
; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST10]], ptr [[GEP]], i32 128, <128 x i1> [[TRN18]]), !tbaa [[TBAA5]]
156156
; CHECK-NEXT: [[GEP19:%.*]] = getelementptr i8, ptr [[ITP]], i32 256
157-
; CHECK-NEXT: [[AND20:%.*]] = and i32 [[PTI1]], 127
158-
; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[AND20]], 0
159-
; CHECK-NEXT: [[TRN21:%.*]] = trunc <128 x i8> [[CST17]] to <128 x i1>
160-
; CHECK-NEXT: [[CUP22:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0), !tbaa [[TBAA5]]
161-
; CHECK-NEXT: [[CST23:%.*]] = bitcast <32 x i32> [[CUP22]] to <128 x i8>
162-
; CHECK-NEXT: [[TMP1:%.*]] = select <128 x i1> [[TRN21]], <128 x i8> [[CST15]], <128 x i8> [[CST23]]
163-
; CHECK-NEXT: [[CST24:%.*]] = bitcast <128 x i8> [[TMP1]] to <32 x i32>
164-
; CHECK-NEXT: call void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0, <32 x i32> [[CST24]]), !tbaa [[TBAA5]]
157+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[PTI1]], 127
158+
; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[TMP0]], 0
159+
; CHECK-NEXT: [[TRN20:%.*]] = trunc <128 x i8> [[CST17]] to <128 x i1>
160+
; CHECK-NEXT: [[CUP21:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0), !tbaa [[TBAA5]]
161+
; CHECK-NEXT: [[CST22:%.*]] = bitcast <32 x i32> [[CUP21]] to <128 x i8>
162+
; CHECK-NEXT: [[TMP1:%.*]] = select <128 x i1> [[TRN20]], <128 x i8> [[CST15]], <128 x i8> [[CST22]]
163+
; CHECK-NEXT: [[CST23:%.*]] = bitcast <128 x i8> [[TMP1]] to <32 x i32>
164+
; CHECK-NEXT: call void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0, <32 x i32> [[CST23]]), !tbaa [[TBAA5]]
165165
; CHECK-NEXT: ret void
166166
;
167167
b0:
@@ -179,11 +179,11 @@ b0:
179179
define void @f4(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
180180
; CHECK-LABEL: @f4(
181181
; CHECK-NEXT: b0:
182-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[A0:%.*]], i32 [[A1:%.*]]
183-
; CHECK-NEXT: [[V1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
182+
; CHECK-NEXT: [[V0:%.*]] = add i32 [[A1:%.*]], 64
183+
; CHECK-NEXT: [[V1:%.*]] = getelementptr i16, ptr [[A0:%.*]], i32 [[V0]]
184184
; CHECK-NEXT: [[PTI:%.*]] = ptrtoint ptr [[V1]] to i32
185-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[PTI]], -128
186-
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[AND]] to ptr
185+
; CHECK-NEXT: [[ADD:%.*]] = and i32 [[PTI]], -128
186+
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[ADD]] to ptr
187187
; CHECK-NEXT: [[PTI1:%.*]] = ptrtoint ptr [[V1]] to i32
188188
; CHECK-NEXT: [[CST3:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32>
189189
; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST3]], <32 x i32> undef, i32 [[PTI1]])
@@ -207,14 +207,14 @@ define void @f4(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
207207
; CHECK-NEXT: [[TRN18:%.*]] = trunc <128 x i8> [[CST12]] to <128 x i1>
208208
; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST10]], ptr [[GEP]], i32 128, <128 x i1> [[TRN18]])
209209
; CHECK-NEXT: [[GEP19:%.*]] = getelementptr i8, ptr [[ITP]], i32 256
210-
; CHECK-NEXT: [[AND20:%.*]] = and i32 [[PTI1]], 127
211-
; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[AND20]], 0
212-
; CHECK-NEXT: [[TRN21:%.*]] = trunc <128 x i8> [[CST17]] to <128 x i1>
213-
; CHECK-NEXT: [[CUP22:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0), !tbaa [[TBAA5]]
214-
; CHECK-NEXT: [[CST23:%.*]] = bitcast <32 x i32> [[CUP22]] to <128 x i8>
215-
; CHECK-NEXT: [[TMP1:%.*]] = select <128 x i1> [[TRN21]], <128 x i8> [[CST15]], <128 x i8> [[CST23]]
216-
; CHECK-NEXT: [[CST24:%.*]] = bitcast <128 x i8> [[TMP1]] to <32 x i32>
217-
; CHECK-NEXT: call void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0, <32 x i32> [[CST24]]), !tbaa [[TBAA5]]
210+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[PTI1]], 127
211+
; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[TMP0]], 0
212+
; CHECK-NEXT: [[TRN20:%.*]] = trunc <128 x i8> [[CST17]] to <128 x i1>
213+
; CHECK-NEXT: [[CUP21:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0), !tbaa [[TBAA5]]
214+
; CHECK-NEXT: [[CST22:%.*]] = bitcast <32 x i32> [[CUP21]] to <128 x i8>
215+
; CHECK-NEXT: [[TMP1:%.*]] = select <128 x i1> [[TRN20]], <128 x i8> [[CST15]], <128 x i8> [[CST22]]
216+
; CHECK-NEXT: [[CST23:%.*]] = bitcast <128 x i8> [[TMP1]] to <32 x i32>
217+
; CHECK-NEXT: call void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0, <32 x i32> [[CST23]]), !tbaa [[TBAA5]]
218218
; CHECK-NEXT: ret void
219219
;
220220
b0:
@@ -232,11 +232,11 @@ b0:
232232
define void @f5(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
233233
; CHECK-LABEL: @f5(
234234
; CHECK-NEXT: b0:
235-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[A0:%.*]], i32 [[A1:%.*]]
236-
; CHECK-NEXT: [[V1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128
235+
; CHECK-NEXT: [[V0:%.*]] = add i32 [[A1:%.*]], 64
236+
; CHECK-NEXT: [[V1:%.*]] = getelementptr i16, ptr [[A0:%.*]], i32 [[V0]]
237237
; CHECK-NEXT: [[PTI:%.*]] = ptrtoint ptr [[V1]] to i32
238-
; CHECK-NEXT: [[AND:%.*]] = and i32 [[PTI]], -128
239-
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[AND]] to ptr
238+
; CHECK-NEXT: [[ADD:%.*]] = and i32 [[PTI]], -128
239+
; CHECK-NEXT: [[ITP:%.*]] = inttoptr i32 [[ADD]] to ptr
240240
; CHECK-NEXT: [[PTI1:%.*]] = ptrtoint ptr [[V1]] to i32
241241
; CHECK-NEXT: [[CST3:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32>
242242
; CHECK-NEXT: [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST3]], <32 x i32> undef, i32 [[PTI1]])
@@ -260,14 +260,14 @@ define void @f5(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
260260
; CHECK-NEXT: [[TRN18:%.*]] = trunc <128 x i8> [[CST12]] to <128 x i1>
261261
; CHECK-NEXT: call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST10]], ptr [[GEP]], i32 128, <128 x i1> [[TRN18]])
262262
; CHECK-NEXT: [[GEP19:%.*]] = getelementptr i8, ptr [[ITP]], i32 256
263-
; CHECK-NEXT: [[AND20:%.*]] = and i32 [[PTI1]], 127
264-
; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[AND20]], 0
265-
; CHECK-NEXT: [[TRN21:%.*]] = trunc <128 x i8> [[CST17]] to <128 x i1>
266-
; CHECK-NEXT: [[CUP22:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0), !tbaa [[TBAA7:![0-9]+]]
267-
; CHECK-NEXT: [[CST23:%.*]] = bitcast <32 x i32> [[CUP22]] to <128 x i8>
268-
; CHECK-NEXT: [[TMP1:%.*]] = select <128 x i1> [[TRN21]], <128 x i8> [[CST15]], <128 x i8> [[CST23]]
269-
; CHECK-NEXT: [[CST24:%.*]] = bitcast <128 x i8> [[TMP1]] to <32 x i32>
270-
; CHECK-NEXT: call void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0, <32 x i32> [[CST24]]), !tbaa [[TBAA7]]
263+
; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[PTI1]], 127
264+
; CHECK-NEXT: [[ISZ:%.*]] = icmp ne i32 [[TMP0]], 0
265+
; CHECK-NEXT: [[TRN20:%.*]] = trunc <128 x i8> [[CST17]] to <128 x i1>
266+
; CHECK-NEXT: [[CUP21:%.*]] = call <32 x i32> @llvm.hexagon.V6.vL32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0), !tbaa [[TBAA7:![0-9]+]]
267+
; CHECK-NEXT: [[CST22:%.*]] = bitcast <32 x i32> [[CUP21]] to <128 x i8>
268+
; CHECK-NEXT: [[TMP1:%.*]] = select <128 x i1> [[TRN20]], <128 x i8> [[CST15]], <128 x i8> [[CST22]]
269+
; CHECK-NEXT: [[CST23:%.*]] = bitcast <128 x i8> [[TMP1]] to <32 x i32>
270+
; CHECK-NEXT: call void @llvm.hexagon.V6.vS32b.pred.ai.128B(i1 [[ISZ]], ptr [[GEP19]], i32 0, <32 x i32> [[CST23]]), !tbaa [[TBAA7]]
271271
; CHECK-NEXT: ret void
272272
;
273273
b0:

llvm/test/Transforms/InstCombine/align-addr.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
33
target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
44

5-
; Instcombine should be able to prove vector alignment in the
6-
; presence of a few mild address computation tricks.
7-
85
define void @test0(ptr %b, i64 %n, i64 %u, i64 %y) nounwind {
96
; CHECK-LABEL: @test0(
107
; CHECK-NEXT: entry:
@@ -18,8 +15,8 @@ define void @test0(ptr %b, i64 %n, i64 %u, i64 %y) nounwind {
1815
; CHECK: bb:
1916
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[BB]] ], [ 20, [[ENTRY:%.*]] ]
2017
; CHECK-NEXT: [[J:%.*]] = mul i64 [[I]], [[V]]
21-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[E]], i64 [[J]]
22-
; CHECK-NEXT: [[T8:%.*]] = getelementptr double, ptr [[TMP0]], i64 [[Z]]
18+
; CHECK-NEXT: [[H:%.*]] = add i64 [[J]], [[Z]]
19+
; CHECK-NEXT: [[T8:%.*]] = getelementptr double, ptr [[E]], i64 [[H]]
2320
; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[T8]], align 8
2421
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[I]], 1
2522
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[N]]

0 commit comments

Comments
 (0)