-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[clang][llvm][SPIR-V] Explicitly encode native integer widths for SPIR-V #110695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang Author: Alex Voicu (AlexVlx) ChangesSPIR-V doesn't currently encode "native" integer bit-widths in its datalayout(s). This is problematic as it leads to optimisation passes, such as InstCombine, getting ideas and e.g. shrinking to non byte-multiple integer types, which is not desirable and can lead to breakage further down in the toolchain. This patch addresses that by encoding As part of the update, we also delete a test that was fragile and depended on very specific CodeGenPrepare behaviour, around capability (integer ops with overflow intrisics) that already has coverage. Full diff: https://github.com/llvm/llvm-project/pull/110695.diff 5 Files Affected:
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index cc79562de2871e..09d4ad3c0ac620 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -314,8 +314,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRVTargetInfo {
// SPIR-V IDs are represented with a single 32-bit word.
SizeType = TargetInfo::UnsignedInt;
- resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
+ resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
+ "v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
}
void getTargetDefines(const LangOptions &Opts,
@@ -338,8 +338,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public BaseSPIRVTargetInfo {
// SPIR-V has core support for atomic ops, and Int32 is always available;
// we take the maximum because it's possible the Host supports wider types.
MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 32);
- resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
+ resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-"
+ "v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
}
void getTargetDefines(const LangOptions &Opts,
@@ -362,8 +362,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public BaseSPIRVTargetInfo {
// SPIR-V has core support for atomic ops, and Int64 is always available;
// we take the maximum because it's possible the Host supports wider types.
MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 64);
- resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
+ resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
+ "v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
}
void getTargetDefines(const LangOptions &Opts,
@@ -388,8 +388,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
PtrDiffType = IntPtrType = TargetInfo::SignedLong;
AddrSpaceMap = &SPIRDefIsGenMap;
- resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0");
+ resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
+ "v256:256-v512:512-v1024:1024-n32:64-S32-G1-P4-A0");
BFloat16Width = BFloat16Align = 16;
BFloat16Format = &llvm::APFloat::BFloat();
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 8548aa00cfe877..fa875fe68b0c5b 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -271,4 +271,4 @@
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=AMDGPUSPIRV64
-// AMDGPUSPIRV64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0"
+// AMDGPUSPIRV64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n32:64-S32-G1-P4-A0"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index bf5f2971cf118c..9132cc8a717e0f 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -638,7 +638,7 @@ void test_get_workgroup_size(int d, global int *out)
// CHECK-LABEL: @test_get_grid_size(
// CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 %.sink
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 %{{.+}}
// CHECK: load i32, ptr addrspace(4) %{{.*}}, align 4, !invariant.load
void test_get_grid_size(int d, global int *out)
{
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index e5384b2eb2c2c1..50c881a19cf58b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -54,14 +54,14 @@ static std::string computeDataLayout(const Triple &TT) {
// memory model used for graphics: PhysicalStorageBuffer64. But it shouldn't
// mean anything.
if (Arch == Triple::spirv32)
- return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
+ return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
+ "v256:256-v512:512-v1024:1024-n8:16:32:64-G1";
if (TT.getVendor() == Triple::VendorType::AMD &&
TT.getOS() == Triple::OSType::AMDHSA)
- return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0";
- return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
+ return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-"
+ "v512:512-v1024:1024-n32:64-S32-G1-P4-A0";
+ return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-"
+ "v512:512-v1024:1024-n8:16:32:64-G1";
}
static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
diff --git a/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll b/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll
deleted file mode 100644
index 1a630f77a44c5d..00000000000000
--- a/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; This test aims to check ability to support "Arithmetic with Overflow" intrinsics
-; in the special case when those intrinsics are being generated by the CodeGenPrepare;
-; pass during translations with optimization (note -O3 in llc arguments).
-
-; RUN: llc -O3 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
-; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-
-; RUN: llc -O3 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
-; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-
-; CHECK-DAG: OpName %[[Val:.*]] "math"
-; CHECK-DAG: OpName %[[IsOver:.*]] "ov"
-; CHECK-DAG: %[[Int:.*]] = OpTypeInt 32 0
-; CHECK-DAG: %[[Char:.*]] = OpTypeInt 8 0
-; CHECK-DAG: %[[PtrChar:.*]] = OpTypePointer Generic %[[Char]]
-; CHECK-DAG: %[[Bool:.*]] = OpTypeBool
-; CHECK-DAG: %[[Struct:.*]] = OpTypeStruct %[[Int]] %[[Int]]
-; CHECK-DAG: %[[Const1:.*]] = OpConstant %[[Int]] 1
-; CHECK-DAG: %[[Const42:.*]] = OpConstant %[[Char]] 42
-; CHECK-DAG: %[[Zero:.*]] = OpConstantNull %[[Int]]
-
-; CHECK: OpFunction
-; CHECK: %[[A:.*]] = OpFunctionParameter %[[Int]]
-; CHECK: %[[Ptr:.*]] = OpFunctionParameter %[[PtrChar]]
-; CHECK: %[[#]] = OpLabel
-; CHECK: OpBranch %[[#]]
-; CHECK: %[[#]] = OpLabel
-; CHECK: %[[PhiRes:.*]] = OpPhi %[[Int]] %[[A]] %[[#]] %[[Val]] %[[#]]
-; CHECK: %[[AggRes:.*]] = OpIAddCarry %[[Struct]] %[[PhiRes]] %[[Const1]]
-; CHECK: %[[Val]] = OpCompositeExtract %[[Int]] %[[AggRes]] 0
-; CHECK: %[[Over:.*]] = OpCompositeExtract %[[Int]] %[[AggRes]] 1
-; CHECK: %[[IsOver]] = OpINotEqual %[[Bool:.*]] %[[Over]] %[[Zero]]
-; CHECK: OpBranchConditional %[[IsOver]] %[[#]] %[[#]]
-; CHECK: OpStore %[[Ptr]] %[[Const42]] Aligned 1
-; CHECK: OpBranch %[[#]]
-; CHECK: %[[#]] = OpLabel
-; CHECK: OpReturnValue %[[Val]]
-; CHECK: OpFunctionEnd
-
-define spir_func i32 @foo(i32 %a, ptr addrspace(4) %p) {
-entry:
- br label %l1
-
-body:
- store i8 42, ptr addrspace(4) %p
- br label %l1
-
-l1:
- %e = phi i32 [ %a, %entry ], [ %i, %body ]
- %i = add nsw i32 %e, 1
- %fl = icmp eq i32 %i, 0
- br i1 %fl, label %exit, label %body
-
-exit:
- ret i32 %i
-}
|
@llvm/pr-subscribers-backend-spir-v Author: Alex Voicu (AlexVlx) ChangesSPIR-V doesn't currently encode "native" integer bit-widths in its datalayout(s). This is problematic as it leads to optimisation passes, such as InstCombine, getting ideas and e.g. shrinking to non byte-multiple integer types, which is not desirable and can lead to breakage further down in the toolchain. This patch addresses that by encoding As part of the update, we also delete a test that was fragile and depended on very specific CodeGenPrepare behaviour, around capability (integer ops with overflow intrisics) that already has coverage. Full diff: https://github.com/llvm/llvm-project/pull/110695.diff 5 Files Affected:
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index cc79562de2871e..09d4ad3c0ac620 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -314,8 +314,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRVTargetInfo {
// SPIR-V IDs are represented with a single 32-bit word.
SizeType = TargetInfo::UnsignedInt;
- resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
+ resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
+ "v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
}
void getTargetDefines(const LangOptions &Opts,
@@ -338,8 +338,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public BaseSPIRVTargetInfo {
// SPIR-V has core support for atomic ops, and Int32 is always available;
// we take the maximum because it's possible the Host supports wider types.
MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 32);
- resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
+ resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-"
+ "v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
}
void getTargetDefines(const LangOptions &Opts,
@@ -362,8 +362,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public BaseSPIRVTargetInfo {
// SPIR-V has core support for atomic ops, and Int64 is always available;
// we take the maximum because it's possible the Host supports wider types.
MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 64);
- resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
+ resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
+ "v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
}
void getTargetDefines(const LangOptions &Opts,
@@ -388,8 +388,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
PtrDiffType = IntPtrType = TargetInfo::SignedLong;
AddrSpaceMap = &SPIRDefIsGenMap;
- resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0");
+ resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
+ "v256:256-v512:512-v1024:1024-n32:64-S32-G1-P4-A0");
BFloat16Width = BFloat16Align = 16;
BFloat16Format = &llvm::APFloat::BFloat();
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index 8548aa00cfe877..fa875fe68b0c5b 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -271,4 +271,4 @@
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=AMDGPUSPIRV64
-// AMDGPUSPIRV64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0"
+// AMDGPUSPIRV64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n32:64-S32-G1-P4-A0"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index bf5f2971cf118c..9132cc8a717e0f 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -638,7 +638,7 @@ void test_get_workgroup_size(int d, global int *out)
// CHECK-LABEL: @test_get_grid_size(
// CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 %.sink
+// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 %{{.+}}
// CHECK: load i32, ptr addrspace(4) %{{.*}}, align 4, !invariant.load
void test_get_grid_size(int d, global int *out)
{
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index e5384b2eb2c2c1..50c881a19cf58b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -54,14 +54,14 @@ static std::string computeDataLayout(const Triple &TT) {
// memory model used for graphics: PhysicalStorageBuffer64. But it shouldn't
// mean anything.
if (Arch == Triple::spirv32)
- return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
+ return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
+ "v256:256-v512:512-v1024:1024-n8:16:32:64-G1";
if (TT.getVendor() == Triple::VendorType::AMD &&
TT.getOS() == Triple::OSType::AMDHSA)
- return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0";
- return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
- "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
+ return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-"
+ "v512:512-v1024:1024-n32:64-S32-G1-P4-A0";
+ return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-"
+ "v512:512-v1024:1024-n8:16:32:64-G1";
}
static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
diff --git a/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll b/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll
deleted file mode 100644
index 1a630f77a44c5d..00000000000000
--- a/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; This test aims to check ability to support "Arithmetic with Overflow" intrinsics
-; in the special case when those intrinsics are being generated by the CodeGenPrepare;
-; pass during translations with optimization (note -O3 in llc arguments).
-
-; RUN: llc -O3 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
-; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-
-; RUN: llc -O3 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
-; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}
-
-; CHECK-DAG: OpName %[[Val:.*]] "math"
-; CHECK-DAG: OpName %[[IsOver:.*]] "ov"
-; CHECK-DAG: %[[Int:.*]] = OpTypeInt 32 0
-; CHECK-DAG: %[[Char:.*]] = OpTypeInt 8 0
-; CHECK-DAG: %[[PtrChar:.*]] = OpTypePointer Generic %[[Char]]
-; CHECK-DAG: %[[Bool:.*]] = OpTypeBool
-; CHECK-DAG: %[[Struct:.*]] = OpTypeStruct %[[Int]] %[[Int]]
-; CHECK-DAG: %[[Const1:.*]] = OpConstant %[[Int]] 1
-; CHECK-DAG: %[[Const42:.*]] = OpConstant %[[Char]] 42
-; CHECK-DAG: %[[Zero:.*]] = OpConstantNull %[[Int]]
-
-; CHECK: OpFunction
-; CHECK: %[[A:.*]] = OpFunctionParameter %[[Int]]
-; CHECK: %[[Ptr:.*]] = OpFunctionParameter %[[PtrChar]]
-; CHECK: %[[#]] = OpLabel
-; CHECK: OpBranch %[[#]]
-; CHECK: %[[#]] = OpLabel
-; CHECK: %[[PhiRes:.*]] = OpPhi %[[Int]] %[[A]] %[[#]] %[[Val]] %[[#]]
-; CHECK: %[[AggRes:.*]] = OpIAddCarry %[[Struct]] %[[PhiRes]] %[[Const1]]
-; CHECK: %[[Val]] = OpCompositeExtract %[[Int]] %[[AggRes]] 0
-; CHECK: %[[Over:.*]] = OpCompositeExtract %[[Int]] %[[AggRes]] 1
-; CHECK: %[[IsOver]] = OpINotEqual %[[Bool:.*]] %[[Over]] %[[Zero]]
-; CHECK: OpBranchConditional %[[IsOver]] %[[#]] %[[#]]
-; CHECK: OpStore %[[Ptr]] %[[Const42]] Aligned 1
-; CHECK: OpBranch %[[#]]
-; CHECK: %[[#]] = OpLabel
-; CHECK: OpReturnValue %[[Val]]
-; CHECK: OpFunctionEnd
-
-define spir_func i32 @foo(i32 %a, ptr addrspace(4) %p) {
-entry:
- br label %l1
-
-body:
- store i8 42, ptr addrspace(4) %p
- br label %l1
-
-l1:
- %e = phi i32 [ %a, %entry ], [ %i, %body ]
- %i = add nsw i32 %e, 1
- %fl = icmp eq i32 %i, 0
- br i1 %fl, label %exit, label %body
-
-exit:
- ret i32 %i
-}
|
@@ -1,56 +0,0 @@ | |||
; This test aims to check ability to support "Arithmetic with Overflow" intrinsics |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure what the problem is with this test, but it's already covered by another?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This relies on CodeGenPrepare::combineToUSubWithOverflow
firing, however when encoding the int sizes we can do loop strength reduction before, which is preferable, but also means one no longer gets the overflow intrinsics inserted, hence the test is spurious. I think you mentioned elsewhere that it's risque to rely on specific optimisations, and since the correct lowering of the overflow intrinsics is already covered, this seems spurious, hence the removal.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This one is testing codegenprepare as part of the normal codegen pipeline, so this one is fine. The other case was a full optimization pipeline + codegen, which are more far removed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right but it's relying on a non-guaranteed maybe-optimisation firing, as far as I can tell.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@AlexVlx I'm strongly against deleting this test case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The main objection is that the code base should switch from one stable state to another, without losing current coverage, stability, etc. When any of us is adding a feature that alters translation behavior it looks fair to expect that the same contributor is responsible for updating all impacted existing components. Applying this principle to the PR, I'd rather expect that you update existing test case (if it's required by the change), but not remove it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@AlexVlx I am not sure I follow the reasoning here. The idea behind those various tests in /SPIRV/optimizations/
(including this one) is to ensure the backend can accept any IR. Either coming from the standard -O3 or some custom pipeline.
I do agree that we should not expect specific optimizations (or even more sets of optimizations) to do specific things when writing a test for an independent component. Hence, after some thinking, I believe the test could be improved by already containing a specific pattern that would be coming from the optimization. However, by the same principle, we should not expect that the issue will be "optimized away" by another optimization.
I would also oppose removing existing test cases. Any improvements, strengthening, or necessary changes are (of course) desirable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right but it's relying on a non-guaranteed maybe-optimisation firing, as far as I can tell.
The point is to test the optimization does work. The codegen pipeline is a bunch of intertwined IR passes on top of core codegen, and they need to cooperate.
Testing what the behavior is is also always important, regardless of whether the result is what you want it to be or not
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@michalpaszkowski @VyacheslavLevytskyy I've restored the test, and retained the apparent intention of triggering combining in CodeGenPrepare. I do think there's a bit of an impedance mismatch in the conversation, but that might be just me: on one hand, there's nothing special about the pattern that results, and we already test for the correct generation of the intrinsics themselves. On the other, LSR does a better job here so it's not quite a case of a problem going away, but rather a more profitable optimisation becoming viable. Perhaps we could extend the test to cover this by way of, essentially, looking for the overflow intrinsics if LSR is off, and checking their absence when it's on, albeit that might introduce some added brittleness.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The codegen prepare behavior is still backend code to be tested. You can just run codegenprepare as a standalone pass too (usually would have separate llc and opt run lines in such a test)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, it should make LLVM IR after optimizations more translatable in SPIR-V! Few questions though:
- Usually (or at least AFAIK) optimization passes won't consider datalayout automatically, as LLVM defines datalayout not as a contract set by the frontend, but a contact, that the code generator expects. Do you plan to go over LLVM passes adding this check?
- Some existing and future extensions might allow extra bit widths for integers. For example here is SPV_INTEL_arbitrary_precision_integers extension that allows any bit widths for integers (it's actually a bad example as it's developed only for _BitInt C23 extension and FPGA hardware, so datalayout wouldn't have an impact on it) or one of the internally discussed within Khronos extensions for ML (which might be impacted by this change). Can we envision, how can we change datalayout information depending on the enabled extensions (or you don't think it's a big problem?)
The datalayout is a widely used global constant. There's no option of "not considering it"
There's nothing new to do here. This has always existed
This does not mean arbitrary integer bitwidths do not work. The n field is weird, it's more of an optimization hint. |
@arsenm here is a small experiment, I've compiled the following OpenCL code:
with the PR pulled in (on top of LLVM's HEAD aadfba9), the compilation command is:
note bitcast to i128 with the following truncation to i96 - those types aren't part of the datalayout, yet some optimization generated them. So something has to be done with it and changing the datalayout is not enough.
Let me clarify myself, _BitInt(N) will work with the change, I have no doubts. But I can imagine a SPIR-V extension to appear that would add support for 4-bit integers. And I can imagine that we would want to not only be able to emit 4-bit integers in the frontend, but also allow optimization passes to emit them. For this it would be nice to have a mechanism that would change datalayout depending on --spirv-ext (or other option). |
You want spirv, not spir
Any pass is allowed to introduce any IR type. This field is a pure optimization hint. It is not required to do anything, and places no restrictions on any pass
Just because there's an extension doesn't mean it's desirable to use them. On real targets, they'll end up codegenning in wider types anyway |
Thanks! Yet the result is the same (for spirv64, just spirv target compilation crashes).
So guess answer to my question would be: "no" :) |
IMHO, whilst it is mechanically possible, we should not make DataLayout mutable subject to things like the presence or absence of an extension, that seems like a recipe for subtle pain. It seems preferable (to me) to just have another triple for targets where arbitrary (and possibly weird) bitwidths are native i.e. desirable / optimal. I will note that 4-bit ints are still somewhat natural (we've had nibbles, after all). What motivated this change was the weirder types that obtain without conveying native int widths ( In what regards the example itself, unfortunately (fortunately?), that's an actual bug which is relatively orthogonal to this particular PR, it fails today with vanilla upstream, please see: https://godbolt.org/z/7aeP797PP. The wide integer types come from GVN, as part of its Load handling, see |
…_layout_with_int_sz
You mean the translator, right? I don't think that SPIR-V backend should follow its practices especially when we could do better.
So we both agree, that the compiler must compile the OpenCL code from above to SPIR-V without erroring out. We can do it in 2 ways:
|
Wel, the issue the example illustrates is pre-existing and not influenced / triggered by this patch; stepping back, if the question is “are you going to try / would you like to fix this other issue” the answer is yes, but sadly I don't have an immediate intuition as to the right solution (1 is |
Don't get me wrong, what I'm saying is not an objection against the patch, but rather an attempt to test the waters and gather what plans/ideas you have :) I can say, that in https://github.com/intel/llvm/ we do have some customization that prevents certain optimizations for SPIR target and was wondering if it's a 'community standard' and if we can align on how InstCombine and other passes should behave when we compile to SPIR-V (if they should, if I read the comments above correctly, Matt has a different opinion). |
InstCombine's primary function is a canonicalization pass. You shouldn't be modifying it for specifically SPIRV optimizations (with the exception of SPIRV intrinsic support). SPIRV specific transforms belong in later backend IR passes |
Does it mean, that the reasoning behind this very PR is not legit?
|
This is not a SPIR-V specific optimisation being added to InstCombine. It addresses a SPIR-V issue (lack of information) on the SPIR-V side, not in InstCombine. |
No. This is providing the generic property in the datalayout used by InstCombine and others as a hint of what to do without directly knowing what the target is |
…_layout_with_int_sz
…_layout_with_int_sz
…_layout_with_int_sz
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, sorry for the delay
…R-V (llvm#110695) SPIR-V doesn't currently encode "native" integer bit-widths in its datalayout(s). This is problematic as it leads to optimisation passes, such as InstCombine, getting ideas and e.g. shrinking to non byte-multiple integer types, which is not desirable and can lead to breakage further down in the toolchain. This patch addresses that by encoding `i8`, `i16`, `i32` and `i64` as native types for vanilla SPIR-V (the spec natively supports them), and `i32` and `i64` for AMDGCNSPIRV (where the hardware targets are known). We also set the stack alignment on the latter, as it is overaligned (32-bit vs 8-bit).
Adds the following patches AMDGPU: Remove wavefrontsize64 feature from dummy target llvm#117410 [LLVM][NFC] Use used's element type if available llvm#116804 [llvm][AMDGPU] Fold llvm.amdgcn.wavefrontsize early llvm#114481 [clang][Driver][HIP] Add support for mixing AMDGCNSPIRV & concrete offload-archs. llvm#113509 [clang][llvm][SPIR-V] Explicitly encode native integer widths for SPIR-V llvm#110695 [llvm][opt][Transforms] Replacement calloc should match replaced malloc llvm#110524 [clang][HIP] Don't use the OpenCLKernel CC when targeting AMDGCNSPIRV llvm#110447 [cuda][HIP] constant should imply constant llvm#110182 [llvm][SPIRV] Expose fast popcnt support for SPIR-V targets llvm#109845 [clang][CodeGen][SPIR-V] Fix incorrect SYCL usage, implement missing interface llvm#109415 [SPIRV][RFC] Rework / extend support for memory scopes llvm#106429 [clang][CodeGen][SPIR-V][AMDGPU] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. llvm#102776 Change-Id: I2b9ab54aba1c9345b9b0eb84409e6ed6c3cdb6cd
SPIR-V doesn't currently encode "native" integer bit-widths in its datalayout(s). This is problematic as it leads to optimisation passes, such as InstCombine, getting ideas and e.g. shrinking to non byte-multiple integer types, which is not desirable and can lead to breakage further down in the toolchain. This patch addresses that by encoding
i8
,i16
,i32
andi64
as native types for vanilla SPIR-V (the spec natively supports them), andi32
andi64
for AMDGCNSPIRV (where the hardware targets are known). We also set the stack alignment on the latter, as it is overaligned (32-bit vs 8-bit).As part of the update, we also delete a test that was fragile and depended on very specific CodeGenPrepare behaviour, around capability (integer ops with overflow intrisics) that already has coverage.