-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LLVM][IRBuilder] Use NUW arithmetic for Create{ElementCount,TypeSize}. #143532
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LLVM][IRBuilder] Use NUW arithmetic for Create{ElementCount,TypeSize}. #143532
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Paul Walker (paulwalker-arm) ChangesThis put the onus on the caller to ensure the result type is big enough. In the unlikely event a cropped result is required then explicitly truncate a safe value. Patch is 891.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143532.diff 132 Files Affected:
diff --git a/clang/test/CodeGen/builtin_vectorelements.c b/clang/test/CodeGen/builtin_vectorelements.c
index b0ff6f83b1e4a..45f7a3c34562b 100644
--- a/clang/test/CodeGen/builtin_vectorelements.c
+++ b/clang/test/CodeGen/builtin_vectorelements.c
@@ -85,7 +85,7 @@ int test_builtin_vectorelements_neon64x1() {
long test_builtin_vectorelements_sve32() {
// SVE: i64 @test_builtin_vectorelements_sve32(
// SVE: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
- // SVE: [[RES:%.+]] = mul i64 [[VSCALE]], 4
+ // SVE: [[RES:%.+]] = mul nuw i64 [[VSCALE]], 4
// SVE: ret i64 [[RES]]
return __builtin_vectorelements(svuint32_t);
}
@@ -93,7 +93,7 @@ long test_builtin_vectorelements_sve32() {
long test_builtin_vectorelements_sve8() {
// SVE: i64 @test_builtin_vectorelements_sve8(
// SVE: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
- // SVE: [[RES:%.+]] = mul i64 [[VSCALE]], 16
+ // SVE: [[RES:%.+]] = mul nuw i64 [[VSCALE]], 16
// SVE: ret i64 [[RES]]
return __builtin_vectorelements(svuint8_t);
}
@@ -105,7 +105,7 @@ long test_builtin_vectorelements_sve8() {
long test_builtin_vectorelements_riscv8() {
// RISCV: i64 @test_builtin_vectorelements_riscv8(
// RISCV: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
- // RISCV: [[RES:%.+]] = mul i64 [[VSCALE]], 8
+ // RISCV: [[RES:%.+]] = mul nuw i64 [[VSCALE]], 8
// RISCV: ret i64 [[RES]]
return __builtin_vectorelements(vuint8m1_t);
}
@@ -120,7 +120,7 @@ long test_builtin_vectorelements_riscv64() {
long test_builtin_vectorelements_riscv32m2() {
// RISCV: i64 @test_builtin_vectorelements_riscv32m2(
// RISCV: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
- // RISCV: [[RES:%.+]] = mul i64 [[VSCALE]], 4
+ // RISCV: [[RES:%.+]] = mul nuw i64 [[VSCALE]], 4
// RISCV: ret i64 [[RES]]
return __builtin_vectorelements(vuint32m2_t);
}
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 59623b4295bb1..072507f599bf6 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -125,7 +125,7 @@ static Value *CreateVScaleMultiple(IRBuilderBase &B, Type *Ty, uint64_t Scale) {
if (Scale == 1)
return VScale;
- return B.CreateMul(VScale, ConstantInt::get(Ty, Scale));
+ return B.CreateNUWMul(VScale, ConstantInt::get(Ty, Scale));
}
Value *IRBuilderBase::CreateElementCount(Type *Ty, ElementCount EC) {
diff --git a/llvm/test/Analysis/ValueTracking/phi-known-bits.ll b/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
index 436aadbc25de6..b8a5be5834993 100644
--- a/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
+++ b/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
@@ -936,7 +936,7 @@ define i1 @recursiveGEP_withPtrSub_scalableGEP(ptr %val1) {
; CHECK: while.cond.i:
; CHECK-NEXT: [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[VAL1:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-NEXT: [[TEST_0_I]] = getelementptr i8, ptr [[A_PN_I]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TEST_0_I]], align 1
; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP2]], 0
@@ -970,7 +970,7 @@ define i1 @recursiveGEP_withPtrSub_scalableGEP_inbounds(ptr %val1) {
; CHECK: while.cond.i:
; CHECK-NEXT: [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[VAL1:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-NEXT: [[TEST_0_I]] = getelementptr inbounds i8, ptr [[A_PN_I]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TEST_0_I]], align 1
; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP2]], 0
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll
index a8da5a3740e59..afa3d0966b555 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll
@@ -315,7 +315,7 @@ declare void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float>, ptr, i32, <vsca
define <vscale x 4 x float> @scalable.load.nxv4f32(ptr %p, <vscale x 4 x i1> %mask) sanitize_address {
; CHECK-LABEL: @scalable.load.nxv4f32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ]
@@ -345,7 +345,7 @@ define <vscale x 4 x float> @scalable.load.nxv4f32(ptr %p, <vscale x 4 x i1> %ma
define void @scalable.store.nxv4f32(ptr %p, <vscale x 4 x float> %arg, <vscale x 4 x i1> %mask) sanitize_address {
; CHECK-LABEL: @scalable.store.nxv4f32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ]
@@ -379,7 +379,7 @@ declare void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x
define <vscale x 4 x float> @scalable.gather.nxv4f32(<vscale x 4 x ptr> %vp, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
; CHECK-LABEL: @scalable.gather.nxv4f32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ]
@@ -409,7 +409,7 @@ define <vscale x 4 x float> @scalable.gather.nxv4f32(<vscale x 4 x ptr> %vp, <vs
define void @scalable.scatter.nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %vp, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
; CHECK-LABEL: @scalable.scatter.nxv4f32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ]
@@ -447,7 +447,7 @@ define <vscale x 4 x float> @scalable.expandload.nxv4f32(ptr align 4 %p, <vscale
; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]]
; CHECK: 4:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP6]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
@@ -485,7 +485,7 @@ define void @scalable.compressstore.nxv4f32(ptr align 4 %p, <vscale x 4 x float>
; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]]
; CHECK: 4:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP6]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll
index d22671aa84f86..f53aa44c8c90f 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll
@@ -255,7 +255,7 @@ define <vscale x 4 x float> @scalable.load.nxv4f32(ptr align 4 %p, <vscale x 4 x
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
@@ -292,7 +292,7 @@ define void @scalable.store.nxv4f32(ptr align 4 %p, <vscale x 4 x float> %arg, <
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
@@ -333,7 +333,7 @@ define <vscale x 4 x float> @scalable.strided.load.nxv4f32(ptr align 4 %p, i32 %
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[STRIDE:%.*]] to i64
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
@@ -372,7 +372,7 @@ define void @scalable.strided.store.nxv4f32(<vscale x 4 x float> %arg, ptr align
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[STRIDE:%.*]] to i64
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
@@ -412,7 +412,7 @@ define <vscale x 4 x float> @scalable.strided.load.nxv4f32.align(ptr align 4 %p,
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
@@ -454,7 +454,7 @@ define <vscale x 4 x float> @scalable.gather.nxv4f32(<vscale x 4 x ptr> %vp, <vs
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
@@ -491,7 +491,7 @@ define void @scalable.scatter.nxv4f32(<vscale x 4 x float> %arg, <vscale x 4 x p
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
diff --git a/llvm/test/Instrumentation/AddressSanitizer/vector-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/vector-load-store.ll
index 120a4b235f850..373cb8a536a2e 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/vector-load-store.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/vector-load-store.ll
@@ -402,7 +402,7 @@ define void @store.v2i32.align8(ptr %p) sanitize_address {
define void @load.nxv1i32(ptr %p) sanitize_address {
; CHECK-LABEL: @load.nxv1i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 32
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 32
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -445,7 +445,7 @@ define void @load.nxv1i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @load.nxv1i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 32
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 32
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]])
@@ -459,7 +459,7 @@ define void @load.nxv1i32(ptr %p) sanitize_address {
define void @load.nxv2i32(ptr %p) sanitize_address {
; CHECK-LABEL: @load.nxv2i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 64
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 64
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -502,7 +502,7 @@ define void @load.nxv2i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @load.nxv2i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 64
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 64
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]])
@@ -516,7 +516,7 @@ define void @load.nxv2i32(ptr %p) sanitize_address {
define void @load.nxv4i32(ptr %p) sanitize_address {
; CHECK-LABEL: @load.nxv4i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 128
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 128
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -559,7 +559,7 @@ define void @load.nxv4i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @load.nxv4i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 128
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 128
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]])
@@ -573,7 +573,7 @@ define void @load.nxv4i32(ptr %p) sanitize_address {
define void @load.nxv8i32(ptr %p) sanitize_address {
; CHECK-LABEL: @load.nxv8i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 256
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 256
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -616,7 +616,7 @@ define void @load.nxv8i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @load.nxv8i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 256
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 256
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]])
@@ -630,7 +630,7 @@ define void @load.nxv8i32(ptr %p) sanitize_address {
define void @load.nxv16i32(ptr %p) sanitize_address {
; CHECK-LABEL: @load.nxv16i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 512
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 512
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -673,7 +673,7 @@ define void @load.nxv16i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @load.nxv16i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 512
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 512
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]])
@@ -688,7 +688,7 @@ define void @load.nxv16i32(ptr %p) sanitize_address {
define void @store.nxv1i32(ptr %p) sanitize_address {
; CHECK-LABEL: @store.nxv1i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 32
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 32
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -731,7 +731,7 @@ define void @store.nxv1i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @store.nxv1i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 32
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 32
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_storeN(i64 [[TMP4]], i64 [[TMP3]])
@@ -745,7 +745,7 @@ define void @store.nxv1i32(ptr %p) sanitize_address {
define void @store.nxv2i32(ptr %p) sanitize_address {
; CHECK-LABEL: @store.nxv2i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 64
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 64
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -788,7 +788,7 @@ define void @store.nxv2i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @store.nxv2i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 64
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 64
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_storeN(i64 [[TMP4]], i64 [[TMP3]])
@@ -802,7 +802,7 @@ define void @store.nxv2i32(ptr %p) sanitize_address {
define void @store.nxv4i32(ptr %p) sanitize_address {
; CHECK-LABEL: @store.nxv4i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 128
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 128
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -845,7 +845,7 @@ define void @store.nxv4i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @store.nxv4i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 128
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 128
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_storeN(i64 [[TMP4]], i64 [[TMP3]])
@@ -859,7 +859,7 @@ define void @store.nxv4i32(ptr %p) sanitize_address {
define void @store.nxv8i32(ptr %p) sanitize_address {
; CHECK-LABEL: @store.nxv8i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 256
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 256
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -902,7 +902,7 @@ define void @store.nxv8i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @store.nxv8i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 256
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 256
; CALLS-NEXT: [[TMP3:%.*]] ...
[truncated]
|
@llvm/pr-subscribers-clang Author: Paul Walker (paulwalker-arm) ChangesThis put the onus on the caller to ensure the result type is big enough. In the unlikely event a cropped result is required then explicitly truncate a safe value. Patch is 891.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143532.diff 132 Files Affected:
diff --git a/clang/test/CodeGen/builtin_vectorelements.c b/clang/test/CodeGen/builtin_vectorelements.c
index b0ff6f83b1e4a..45f7a3c34562b 100644
--- a/clang/test/CodeGen/builtin_vectorelements.c
+++ b/clang/test/CodeGen/builtin_vectorelements.c
@@ -85,7 +85,7 @@ int test_builtin_vectorelements_neon64x1() {
long test_builtin_vectorelements_sve32() {
// SVE: i64 @test_builtin_vectorelements_sve32(
// SVE: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
- // SVE: [[RES:%.+]] = mul i64 [[VSCALE]], 4
+ // SVE: [[RES:%.+]] = mul nuw i64 [[VSCALE]], 4
// SVE: ret i64 [[RES]]
return __builtin_vectorelements(svuint32_t);
}
@@ -93,7 +93,7 @@ long test_builtin_vectorelements_sve32() {
long test_builtin_vectorelements_sve8() {
// SVE: i64 @test_builtin_vectorelements_sve8(
// SVE: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
- // SVE: [[RES:%.+]] = mul i64 [[VSCALE]], 16
+ // SVE: [[RES:%.+]] = mul nuw i64 [[VSCALE]], 16
// SVE: ret i64 [[RES]]
return __builtin_vectorelements(svuint8_t);
}
@@ -105,7 +105,7 @@ long test_builtin_vectorelements_sve8() {
long test_builtin_vectorelements_riscv8() {
// RISCV: i64 @test_builtin_vectorelements_riscv8(
// RISCV: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
- // RISCV: [[RES:%.+]] = mul i64 [[VSCALE]], 8
+ // RISCV: [[RES:%.+]] = mul nuw i64 [[VSCALE]], 8
// RISCV: ret i64 [[RES]]
return __builtin_vectorelements(vuint8m1_t);
}
@@ -120,7 +120,7 @@ long test_builtin_vectorelements_riscv64() {
long test_builtin_vectorelements_riscv32m2() {
// RISCV: i64 @test_builtin_vectorelements_riscv32m2(
// RISCV: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64()
- // RISCV: [[RES:%.+]] = mul i64 [[VSCALE]], 4
+ // RISCV: [[RES:%.+]] = mul nuw i64 [[VSCALE]], 4
// RISCV: ret i64 [[RES]]
return __builtin_vectorelements(vuint32m2_t);
}
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 59623b4295bb1..072507f599bf6 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -125,7 +125,7 @@ static Value *CreateVScaleMultiple(IRBuilderBase &B, Type *Ty, uint64_t Scale) {
if (Scale == 1)
return VScale;
- return B.CreateMul(VScale, ConstantInt::get(Ty, Scale));
+ return B.CreateNUWMul(VScale, ConstantInt::get(Ty, Scale));
}
Value *IRBuilderBase::CreateElementCount(Type *Ty, ElementCount EC) {
diff --git a/llvm/test/Analysis/ValueTracking/phi-known-bits.ll b/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
index 436aadbc25de6..b8a5be5834993 100644
--- a/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
+++ b/llvm/test/Analysis/ValueTracking/phi-known-bits.ll
@@ -936,7 +936,7 @@ define i1 @recursiveGEP_withPtrSub_scalableGEP(ptr %val1) {
; CHECK: while.cond.i:
; CHECK-NEXT: [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[VAL1:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-NEXT: [[TEST_0_I]] = getelementptr i8, ptr [[A_PN_I]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TEST_0_I]], align 1
; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP2]], 0
@@ -970,7 +970,7 @@ define i1 @recursiveGEP_withPtrSub_scalableGEP_inbounds(ptr %val1) {
; CHECK: while.cond.i:
; CHECK-NEXT: [[A_PN_I:%.*]] = phi ptr [ [[TEST_0_I:%.*]], [[WHILE_COND_I]] ], [ [[VAL1:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 4
; CHECK-NEXT: [[TEST_0_I]] = getelementptr inbounds i8, ptr [[A_PN_I]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TEST_0_I]], align 1
; CHECK-NEXT: [[CMP3_NOT_I:%.*]] = icmp eq i8 [[TMP2]], 0
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll
index a8da5a3740e59..afa3d0966b555 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll
@@ -315,7 +315,7 @@ declare void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float>, ptr, i32, <vsca
define <vscale x 4 x float> @scalable.load.nxv4f32(ptr %p, <vscale x 4 x i1> %mask) sanitize_address {
; CHECK-LABEL: @scalable.load.nxv4f32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ]
@@ -345,7 +345,7 @@ define <vscale x 4 x float> @scalable.load.nxv4f32(ptr %p, <vscale x 4 x i1> %ma
define void @scalable.store.nxv4f32(ptr %p, <vscale x 4 x float> %arg, <vscale x 4 x i1> %mask) sanitize_address {
; CHECK-LABEL: @scalable.store.nxv4f32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ]
@@ -379,7 +379,7 @@ declare void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x
define <vscale x 4 x float> @scalable.gather.nxv4f32(<vscale x 4 x ptr> %vp, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
; CHECK-LABEL: @scalable.gather.nxv4f32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ]
@@ -409,7 +409,7 @@ define <vscale x 4 x float> @scalable.gather.nxv4f32(<vscale x 4 x ptr> %vp, <vs
define void @scalable.scatter.nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %vp, <vscale x 4 x i1> %mask, i32 %evl) sanitize_address {
; CHECK-LABEL: @scalable.scatter.nxv4f32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ]
@@ -447,7 +447,7 @@ define <vscale x 4 x float> @scalable.expandload.nxv4f32(ptr align 4 %p, <vscale
; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]]
; CHECK: 4:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP6]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
@@ -485,7 +485,7 @@ define void @scalable.compressstore.nxv4f32(ptr align 4 %p, <vscale x 4 x float>
; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP13:%.*]]
; CHECK: 4:
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP6]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll
index d22671aa84f86..f53aa44c8c90f 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan-vp-load-store.ll
@@ -255,7 +255,7 @@ define <vscale x 4 x float> @scalable.load.nxv4f32(ptr align 4 %p, <vscale x 4 x
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
@@ -292,7 +292,7 @@ define void @scalable.store.nxv4f32(ptr align 4 %p, <vscale x 4 x float> %arg, <
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
@@ -333,7 +333,7 @@ define <vscale x 4 x float> @scalable.strided.load.nxv4f32(ptr align 4 %p, i32 %
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[STRIDE:%.*]] to i64
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
@@ -372,7 +372,7 @@ define void @scalable.strided.store.nxv4f32(<vscale x 4 x float> %arg, ptr align
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[STRIDE:%.*]] to i64
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
@@ -412,7 +412,7 @@ define <vscale x 4 x float> @scalable.strided.load.nxv4f32.align(ptr align 4 %p,
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
@@ -454,7 +454,7 @@ define <vscale x 4 x float> @scalable.gather.nxv4f32(<vscale x 4 x ptr> %vp, <vs
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
@@ -491,7 +491,7 @@ define void @scalable.scatter.nxv4f32(<vscale x 4 x float> %arg, <vscale x 4 x p
; CHECK: 2:
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[EVL]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP3]], i64 [[TMP5]])
; CHECK-NEXT: br label [[DOTSPLIT:%.*]]
; CHECK: .split:
diff --git a/llvm/test/Instrumentation/AddressSanitizer/vector-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/vector-load-store.ll
index 120a4b235f850..373cb8a536a2e 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/vector-load-store.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/vector-load-store.ll
@@ -402,7 +402,7 @@ define void @store.v2i32.align8(ptr %p) sanitize_address {
define void @load.nxv1i32(ptr %p) sanitize_address {
; CHECK-LABEL: @load.nxv1i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 32
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 32
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -445,7 +445,7 @@ define void @load.nxv1i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @load.nxv1i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 32
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 32
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]])
@@ -459,7 +459,7 @@ define void @load.nxv1i32(ptr %p) sanitize_address {
define void @load.nxv2i32(ptr %p) sanitize_address {
; CHECK-LABEL: @load.nxv2i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 64
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 64
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -502,7 +502,7 @@ define void @load.nxv2i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @load.nxv2i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 64
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 64
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]])
@@ -516,7 +516,7 @@ define void @load.nxv2i32(ptr %p) sanitize_address {
define void @load.nxv4i32(ptr %p) sanitize_address {
; CHECK-LABEL: @load.nxv4i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 128
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 128
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -559,7 +559,7 @@ define void @load.nxv4i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @load.nxv4i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 128
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 128
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]])
@@ -573,7 +573,7 @@ define void @load.nxv4i32(ptr %p) sanitize_address {
define void @load.nxv8i32(ptr %p) sanitize_address {
; CHECK-LABEL: @load.nxv8i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 256
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 256
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -616,7 +616,7 @@ define void @load.nxv8i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @load.nxv8i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 256
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 256
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]])
@@ -630,7 +630,7 @@ define void @load.nxv8i32(ptr %p) sanitize_address {
define void @load.nxv16i32(ptr %p) sanitize_address {
; CHECK-LABEL: @load.nxv16i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 512
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 512
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -673,7 +673,7 @@ define void @load.nxv16i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @load.nxv16i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 512
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 512
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_loadN(i64 [[TMP4]], i64 [[TMP3]])
@@ -688,7 +688,7 @@ define void @load.nxv16i32(ptr %p) sanitize_address {
define void @store.nxv1i32(ptr %p) sanitize_address {
; CHECK-LABEL: @store.nxv1i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 32
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 32
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -731,7 +731,7 @@ define void @store.nxv1i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @store.nxv1i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 32
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 32
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_storeN(i64 [[TMP4]], i64 [[TMP3]])
@@ -745,7 +745,7 @@ define void @store.nxv1i32(ptr %p) sanitize_address {
define void @store.nxv2i32(ptr %p) sanitize_address {
; CHECK-LABEL: @store.nxv2i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 64
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 64
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -788,7 +788,7 @@ define void @store.nxv2i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @store.nxv2i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 64
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 64
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_storeN(i64 [[TMP4]], i64 [[TMP3]])
@@ -802,7 +802,7 @@ define void @store.nxv2i32(ptr %p) sanitize_address {
define void @store.nxv4i32(ptr %p) sanitize_address {
; CHECK-LABEL: @store.nxv4i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 128
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 128
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -845,7 +845,7 @@ define void @store.nxv4i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @store.nxv4i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 128
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 128
; CALLS-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CALLS-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CALLS-NEXT: call void @__asan_storeN(i64 [[TMP4]], i64 [[TMP3]])
@@ -859,7 +859,7 @@ define void @store.nxv4i32(ptr %p) sanitize_address {
define void @store.nxv8i32(ptr %p) sanitize_address {
; CHECK-LABEL: @store.nxv8i32(
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 256
+; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 256
; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], 1
@@ -902,7 +902,7 @@ define void @store.nxv8i32(ptr %p) sanitize_address {
;
; CALLS-LABEL: @store.nxv8i32(
; CALLS-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
-; CALLS-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 256
+; CALLS-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 256
; CALLS-NEXT: [[TMP3:%.*]] ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM; I do think it's worth documenting the assumption/requirement about a sufficiently large type in the comments on the declarations for CreateElementCount and CreateTypeSize.
@@ -177,16 +177,16 @@ define void @add_unique_ind32(ptr noalias nocapture %a, i64 %n) { | |||
; CHECK-LABEL: @add_unique_ind32( | |||
; CHECK-NEXT: entry: | |||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() | |||
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2 | |||
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess the code here already implicitly assumes that the multiply does not overflow, but it is not immedately clear how this follows from the definition of llvm.vscale
in LangRef, which says only limits vscale
to a positive constant:
vscale is a positive value that is constant throughout program execution, but is unknown at compile time. If the result value does not fit in the result type, then the result is a [poison value](https://llvm.org/docs/LangRef.html#poisonvalues).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This PR is not changing the definition of vscale. It only seeks to strengthen the use of IRBuilder functions CreateElementCount()
and CreateTypeSize()
where I'm wanting to mandate their specified result type must be big enough to store the requested value, otherwise the result can be poison. My rational is that it seems odd to use these functions and not care about generating the correct value.
I plan to follow @huntergr-arm's advice to document the new expectation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, but it seems like the current users don't follow the new behavior and the IR we generate for LV is now more poisonous than before?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please can you be specific and show the code that does not follow the new expected behaviour? My assertion is that all current users already assume a non-truncated result and thus we can be sure the internal calculation does not wrap.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep the current users assume it doesn't wrap, but I think it would probably be good to back that assumption with a adjustment to langref before relying even more on that assumption. One way to do so would be to add a cap on the possible value of vscale
: #144607
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good. This doesn't need to gate this PR does it? The LangRef change is legitimising a decision LoopVectorize has already made, which this PR does not affect.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok great, this doesn't need to wait for the clarification in LangRef to land, as long as it gets addressed soon.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM! I'm happy with this approach - we shouldn't really be calling CreateElementCount and CreateTypeSize with types that cannot fit the requested element count/type size, since that feels more like a bug with the caller.
@@ -951,11 +951,13 @@ class IRBuilderBase { | |||
} | |||
|
|||
/// Create an expression which evaluates to the number of elements in \p EC | |||
/// at runtime. | |||
/// at runtime. This can result in poison if type \p Ty is not big enough to |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How could/should the caller determine that the result fits into the type w/o restrictions on the possible range of vscale?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure they can, but that doesn't stop all the current callers assuming the result fits. I am not aware of any existing code that wants a truncated value, with all assuming an i32 or i64 will be big enough to represent the size of a vector.
Are there anymore concerns with this PR? I honestly thought this to be a non-contentious change but am happy to seek wider consensus if need be. |
0f6db8e
to
1ec21cb
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
This put the onus on the caller to ensure the result type is big enough. In the unlikely event a cropped result is required then explicitly truncate a safe value.
1ec21cb
to
b20bbde
Compare
This put the onus on the caller to ensure the result type is big enough. In the unlikely event a cropped result is required then explicitly truncate a safe value.