Skip to content

[clang][llvm][SPIR-V] Explicitly encode native integer widths for SPIR-V #110695

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions clang/lib/Basic/Targets/SPIR.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRVTargetInfo {

// SPIR-V IDs are represented with a single 32-bit word.
SizeType = TargetInfo::UnsignedInt;
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
"v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
}

void getTargetDefines(const LangOptions &Opts,
Expand All @@ -338,8 +338,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public BaseSPIRVTargetInfo {
// SPIR-V has core support for atomic ops, and Int32 is always available;
// we take the maximum because it's possible the Host supports wider types.
MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 32);
resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-"
"v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
}

void getTargetDefines(const LangOptions &Opts,
Expand All @@ -362,8 +362,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public BaseSPIRVTargetInfo {
// SPIR-V has core support for atomic ops, and Int64 is always available;
// we take the maximum because it's possible the Host supports wider types.
MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 64);
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1");
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
"v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
}

void getTargetDefines(const LangOptions &Opts,
Expand All @@ -388,8 +388,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64AMDGCNTargetInfo final
PtrDiffType = IntPtrType = TargetInfo::SignedLong;
AddrSpaceMap = &SPIRDefIsGenMap;

resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0");
resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
"v256:256-v512:512-v1024:1024-n32:64-S32-G1-P4-A0");

BFloat16Width = BFloat16Align = 16;
BFloat16Format = &llvm::APFloat::BFloat();
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGen/target-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -271,4 +271,4 @@

// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -o - -emit-llvm %s | \
// RUN: FileCheck %s -check-prefix=AMDGPUSPIRV64
// AMDGPUSPIRV64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0"
// AMDGPUSPIRV64: target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n32:64-S32-G1-P4-A0"
2 changes: 1 addition & 1 deletion clang/test/CodeGenOpenCL/builtins-amdgcn.cl
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ void test_get_workgroup_size(int d, global int *out)

// CHECK-LABEL: @test_get_grid_size(
// CHECK: {{.*}}call align 4 dereferenceable(64){{.*}} ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 %.sink
// CHECK: getelementptr inbounds i8, ptr addrspace(4) %{{.*}}, i64 %{{.+}}
// CHECK: load i32, ptr addrspace(4) %{{.*}}, align 4, !invariant.load
void test_get_grid_size(int d, global int *out)
{
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ static std::string computeDataLayout(const Triple &TT) {
// memory model used for graphics: PhysicalStorageBuffer64. But it shouldn't
// mean anything.
if (Arch == Triple::spirv32)
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
"v256:256-v512:512-v1024:1024-n8:16:32:64-G1";
if (TT.getVendor() == Triple::VendorType::AMD &&
TT.getOS() == Triple::OSType::AMDHSA)
return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1-P4-A0";
return "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
"v96:128-v192:256-v256:256-v512:512-v1024:1024-G1";
return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-"
"v512:512-v1024:1024-n32:64-S32-G1-P4-A0";
return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-"
"v512:512-v1024:1024-n8:16:32:64-G1";
}

static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM) {
Expand Down
71 changes: 53 additions & 18 deletions llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll
Original file line number Diff line number Diff line change
@@ -1,41 +1,76 @@
; This test aims to check ability to support "Arithmetic with Overflow" intrinsics
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what the problem is with this test, but it's already covered by another?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This relies on CodeGenPrepare::combineToUSubWithOverflow firing, however when encoding the int sizes we can do loop strength reduction before, which is preferable, but also means one no longer gets the overflow intrinsics inserted, hence the test is spurious. I think you mentioned elsewhere that it's risque to rely on specific optimisations, and since the correct lowering of the overflow intrinsics is already covered, this seems spurious, hence the removal.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one is testing codegenprepare as part of the normal codegen pipeline, so this one is fine. The other case was a full optimization pipeline + codegen, which are more far removed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right but it's relying on a non-guaranteed maybe-optimisation firing, as far as I can tell.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AlexVlx I'm strongly against deleting this test case.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main objection is that the code base should switch from one stable state to another, without losing current coverage, stability, etc. When any of us is adding a feature that alters translation behavior it looks fair to expect that the same contributor is responsible for updating all impacted existing components. Applying this principle to the PR, I'd rather expect that you update existing test case (if it's required by the change), but not remove it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AlexVlx I am not sure I follow the reasoning here. The idea behind those various tests in /SPIRV/optimizations/ (including this one) is to ensure the backend can accept any IR. Either coming from the standard -O3 or some custom pipeline.

I do agree that we should not expect specific optimizations (or even more sets of optimizations) to do specific things when writing a test for an independent component. Hence, after some thinking, I believe the test could be improved by already containing a specific pattern that would be coming from the optimization. However, by the same principle, we should not expect that the issue will be "optimized away" by another optimization.

I would also oppose removing existing test cases. Any improvements, strengthening, or necessary changes are (of course) desirable.

Copy link
Contributor

@arsenm arsenm Oct 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right but it's relying on a non-guaranteed maybe-optimisation firing, as far as I can tell.

The point is to test the optimization does work. The codegen pipeline is a bunch of intertwined IR passes on top of core codegen, and they need to cooperate.

Testing what the behavior is is also always important, regardless of whether the result is what you want it to be or not

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@michalpaszkowski @VyacheslavLevytskyy I've restored the test, and retained the apparent intention of triggering combining in CodeGenPrepare. I do think there's a bit of an impedance mismatch in the conversation, but that might be just me: on one hand, there's nothing special about the pattern that results, and we already test for the correct generation of the intrinsics themselves. On the other, LSR does a better job here so it's not quite a case of a problem going away, but rather a more profitable optimisation becoming viable. Perhaps we could extend the test to cover this by way of, essentially, looking for the overflow intrinsics if LSR is off, and checking their absence when it's on, albeit that might introduce some added brittleness.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The codegen prepare behavior is still backend code to be tested. You can just run codegenprepare as a standalone pass too (usually would have separate llc and opt run lines in such a test)

; in the special case when those intrinsics are being generated by the CodeGenPrepare;
; pass during translations with optimization (note -O3 in llc arguments).
; pass during translations with optimization (note -disable-lsr, to inhibit
; strength reduction pre-empting with a more preferable match for this pattern
; in llc arguments).

; RUN: llc -O3 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}

; RUN: llc -O3 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s
; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}

; CHECK-DAG: OpName %[[Val:.*]] "math"
; CHECK-DAG: OpName %[[IsOver:.*]] "ov"
; RUN: llc -O3 -disable-lsr -mtriple=spirv32-unknown-unknown %s -o - | FileCheck --check-prefix=NOLSR %s
; RUN: %if spirv-tools %{ llc -O3 -disable-lsr -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %}

; RUN: llc -O3 -disable-lsr -mtriple=spirv64-unknown-unknown %s -o - | FileCheck --check-prefix=NOLSR %s
; RUN: %if spirv-tools %{ llc -O3 -disable-lsr -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %}

; CHECK-DAG: OpName %[[PhiRes:.*]] "lsr.iv"
; CHECK-DAG: OpName %[[IsOver:.*]] "fl"
; CHECK-DAG: OpName %[[Val:.*]] "lsr.iv.next"
; CHECK-DAG: %[[Int:.*]] = OpTypeInt 32 0
; CHECK-DAG: %[[Char:.*]] = OpTypeInt 8 0
; CHECK-DAG: %[[PtrChar:.*]] = OpTypePointer Generic %[[Char]]
; CHECK-DAG: %[[Bool:.*]] = OpTypeBool
; CHECK-DAG: %[[Struct:.*]] = OpTypeStruct %[[Int]] %[[Int]]
; CHECK-DAG: %[[Const1:.*]] = OpConstant %[[Int]] 1
; CHECK-DAG: %[[Zero:.*]] = OpConstant %[[Int]] 0
; CHECK-DAG: %[[Const42:.*]] = OpConstant %[[Char]] 42
; CHECK-DAG: %[[Zero:.*]] = OpConstantNull %[[Int]]

; CHECK: OpFunction
; CHECK: %[[A:.*]] = OpFunctionParameter %[[Int]]
; CHECK: %[[Ptr:.*]] = OpFunctionParameter %[[PtrChar]]
; CHECK: %[[#]] = OpLabel
; CHECK: OpBranch %[[#]]
; CHECK: %[[#]] = OpLabel
; CHECK: %[[PhiRes:.*]] = OpPhi %[[Int]] %[[A]] %[[#]] %[[Val]] %[[#]]
; CHECK: %[[AggRes:.*]] = OpIAddCarry %[[Struct]] %[[PhiRes]] %[[Const1]]
; CHECK: %[[Val]] = OpCompositeExtract %[[Int]] %[[AggRes]] 0
; CHECK: %[[Over:.*]] = OpCompositeExtract %[[Int]] %[[AggRes]] 1
; CHECK: %[[IsOver]] = OpINotEqual %[[Bool:.*]] %[[Over]] %[[Zero]]
; CHECK: OpBranchConditional %[[IsOver]] %[[#]] %[[#]]
; CHECK: OpStore %[[Ptr]] %[[Const42]] Aligned 1
; CHECK: %[[APlusOne:.*]] = OpIAdd %[[Int]] %[[A]] %[[Const1]]
; CHECK: OpBranch %[[#]]
; CHECK: [[#]] = OpLabel
; CHECK: %[[PhiRes]] = OpPhi %[[Int]] %[[Val]] %[[#]] %[[APlusOne]] %[[#]]
; CHECK: %[[IsOver]] = OpIEqual %[[Bool]] %[[#]] %[[#]]
; CHECK: OpBranchConditional %[[IsOver]] %[[#]] %[[#]]
; CHECK: [[#]] = OpLabel
; CHECK: OpStore %[[Ptr]] %[[Const42]] Aligned 1
; CHECK: [[Val]] = OpIAdd %[[Int]] %[[PhiRes]] %[[Const1]]
; CHECK: OpBranch %[[#]]
; CHECK: %[[#]] = OpLabel
; CHECK: OpReturnValue %[[Val]]
; CHECK: OpFunctionEnd
; CHECK: [[#]] = OpLabel
; OpReturnValue %[[PhiRes]]

; NOLSR-DAG: OpName %[[Val:.*]] "math"
; NOLSR-DAG: OpName %[[IsOver:.*]] "ov"
; NOLSR-DAG: %[[Int:.*]] = OpTypeInt 32 0
; NOLSR-DAG: %[[Char:.*]] = OpTypeInt 8 0
; NOLSR-DAG: %[[PtrChar:.*]] = OpTypePointer Generic %[[Char]]
; NOLSR-DAG: %[[Bool:.*]] = OpTypeBool
; NOLSR-DAG: %[[Struct:.*]] = OpTypeStruct %[[Int]] %[[Int]]
; NOLSR-DAG: %[[Const1:.*]] = OpConstant %[[Int]] 1
; NOLSR-DAG: %[[Const42:.*]] = OpConstant %[[Char]] 42
; NOLSR-DAG: %[[Zero:.*]] = OpConstantNull %[[Int]]

; NOLSR: OpFunction
; NOLSR: %[[A:.*]] = OpFunctionParameter %[[Int]]
; NOLSR: %[[Ptr:.*]] = OpFunctionParameter %[[PtrChar]]
; NOLSR: %[[#]] = OpLabel
; NOLSR: OpBranch %[[#]]
; NOLSR: %[[#]] = OpLabel
; NOLSR: %[[PhiRes:.*]] = OpPhi %[[Int]] %[[A]] %[[#]] %[[Val]] %[[#]]
; NOLSR: %[[AggRes:.*]] = OpIAddCarry %[[Struct]] %[[PhiRes]] %[[Const1]]
; NOLSR: %[[Val]] = OpCompositeExtract %[[Int]] %[[AggRes]] 0
; NOLSR: %[[Over:.*]] = OpCompositeExtract %[[Int]] %[[AggRes]] 1
; NOLSR: %[[IsOver]] = OpINotEqual %[[Bool:.*]] %[[Over]] %[[Zero]]
; NOLSR: OpBranchConditional %[[IsOver]] %[[#]] %[[#]]
; NOLSR: OpStore %[[Ptr]] %[[Const42]] Aligned 1
; NOLSR: OpBranch %[[#]]
; NOLSR: %[[#]] = OpLabel
; NOLSR: OpReturnValue %[[Val]]
; NOLSR: OpFunctionEnd

define spir_func i32 @foo(i32 %a, ptr addrspace(4) %p) {
entry:
Expand Down
Loading