Skip to content

Commit 830ed64

Browse files
committed
Revert "Revert "[AMDGPU] Reorganize GCN subtarget features for unaligned access""
This reverts commit 8b08fa0. The underlying problems were fixed by D90607.
1 parent f6a326a commit 830ed64

16 files changed

+79
-62
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
9090
def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
9191
"UnalignedBufferAccess",
9292
"true",
93-
"Support unaligned global loads and stores"
93+
"Hardware supports unaligned global loads and stores"
9494
>;
9595

9696
def FeatureTrapHandler: SubtargetFeature<"trap-handler",
@@ -105,18 +105,10 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
105105
"Support unaligned scratch loads and stores"
106106
>;
107107

108-
// LDS alignment enforcement is controlled by a configuration register:
109-
// SH_MEM_CONFIG.alignment_mode
110-
def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode",
111-
"UnalignedAccessMode",
112-
"true",
113-
"Support unaligned local and region loads and stores"
114-
>;
115-
116108
def FeatureUnalignedDSAccess : SubtargetFeature<"unaligned-ds-access",
117109
"UnalignedDSAccess",
118110
"true",
119-
"Does not requires 16 byte alignment for certain local and region loads and stores"
111+
"Hardware supports unaligned local and region loads and stores"
120112
>;
121113

122114
def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
@@ -658,6 +650,15 @@ def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range",
658650
"Requires use of fract on arguments to trig instructions"
659651
>;
660652

653+
// Alignment enforcement is controlled by a configuration register:
654+
// SH_MEM_CONFIG.alignment_mode
655+
def FeatureUnalignedAccessMode : SubtargetFeature<"unaligned-access-mode",
656+
"UnalignedAccessMode",
657+
"true",
658+
"Enable unaligned global, local and region loads and stores if the hardware"
659+
" supports it"
660+
>;
661+
661662
// Dummy feature used to disable assembler instructions.
662663
def FeatureDisable : SubtargetFeature<"",
663664
"FeatureDisable","true",
@@ -684,7 +685,8 @@ def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
684685
FeatureWavefrontSize64, FeatureFlatAddressSpace,
685686
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
686687
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
687-
FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC]
688+
FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC,
689+
FeatureUnalignedBufferAccess]
688690
>;
689691

690692
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
@@ -697,7 +699,8 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
697699
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
698700
FeatureIntClamp, FeatureTrigReducedRange, FeatureGFX8Insts,
699701
FeatureGFX7GFX8GFX9Insts, FeatureSMemTimeInst, FeatureMadMacF32Insts,
700-
FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32
702+
FeatureDsSrc2Insts, FeatureDoesNotSupportSRAMECC, FeatureFastDenormalF32,
703+
FeatureUnalignedBufferAccess
701704
]
702705
>;
703706

@@ -714,7 +717,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
714717
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
715718
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
716719
FeatureSMemTimeInst, FeatureMadMacF32Insts, FeatureDsSrc2Insts,
717-
FeatureFastDenormalF32, FeatureUnalignedDSAccess
720+
FeatureFastDenormalF32, FeatureUnalignedBufferAccess,
721+
FeatureUnalignedDSAccess
718722
]
719723
>;
720724

@@ -733,7 +737,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
733737
FeatureVOP3Literal, FeatureDPP8,
734738
FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
735739
FeatureGFX10A16, FeatureFastDenormalF32, FeatureG16,
736-
FeatureUnalignedDSAccess
740+
FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess
737741
]
738742
>;
739743

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,9 +1068,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
10681068
return false;
10691069
};
10701070

1071-
unsigned GlobalAlign32 = ST.hasUnalignedBufferAccess() ? 0 : 32;
1072-
unsigned GlobalAlign16 = ST.hasUnalignedBufferAccess() ? 0 : 16;
1073-
unsigned GlobalAlign8 = ST.hasUnalignedBufferAccess() ? 0 : 8;
1071+
unsigned GlobalAlign32 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 32;
1072+
unsigned GlobalAlign16 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 16;
1073+
unsigned GlobalAlign8 = ST.hasUnalignedBufferAccessEnabled() ? 0 : 8;
10741074

10751075
// TODO: Refine based on subtargets which support unaligned access or 128-bit
10761076
// LDS

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
9090
SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,");
9191

9292
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
93-
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
93+
FullFS += "+flat-for-global,+unaligned-access-mode,+trap-handler,";
9494

9595
FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
9696

@@ -194,7 +194,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
194194
FlatForGlobal(false),
195195
AutoWaitcntBeforeBarrier(false),
196196
UnalignedScratchAccess(false),
197-
UnalignedBufferAccess(false),
198197
UnalignedAccessMode(false),
199198

200199
HasApertureRegs(false),
@@ -266,6 +265,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
266265
HasUnpackedD16VMem(false),
267266
LDSMisalignedBug(false),
268267
HasMFMAInlineLiteralBug(false),
268+
UnalignedBufferAccess(false),
269269
UnalignedDSAccess(false),
270270

271271
ScalarizeGlobal(false),

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
317317
bool FlatForGlobal;
318318
bool AutoWaitcntBeforeBarrier;
319319
bool UnalignedScratchAccess;
320-
bool UnalignedBufferAccess;
321320
bool UnalignedAccessMode;
322321
bool HasApertureRegs;
323322
bool EnableXNACK;
@@ -398,6 +397,7 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
398397
bool HasMFMAInlineLiteralBug;
399398
bool HasVertexCache;
400399
short TexVTXClauseSize;
400+
bool UnalignedBufferAccess;
401401
bool UnalignedDSAccess;
402402
bool ScalarizeGlobal;
403403

@@ -702,6 +702,18 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
702702
return UnalignedBufferAccess;
703703
}
704704

705+
bool hasUnalignedBufferAccessEnabled() const {
706+
return UnalignedBufferAccess && UnalignedAccessMode;
707+
}
708+
709+
bool hasUnalignedDSAccess() const {
710+
return UnalignedDSAccess;
711+
}
712+
713+
bool hasUnalignedDSAccessEnabled() const {
714+
return UnalignedDSAccess && UnalignedAccessMode;
715+
}
716+
705717
bool hasUnalignedScratchAccess() const {
706718
return UnalignedScratchAccess;
707719
}
@@ -710,10 +722,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
710722
return UnalignedAccessMode;
711723
}
712724

713-
bool hasUnalignedDSAccess() const {
714-
return UnalignedDSAccess;
715-
}
716-
717725
bool hasApertureRegs() const {
718726
return HasApertureRegs;
719727
}

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
8888
AMDGPU::FeatureEnableUnsafeDSOffsetFolding,
8989
AMDGPU::FeatureFlatForGlobal,
9090
AMDGPU::FeaturePromoteAlloca,
91-
AMDGPU::FeatureUnalignedBufferAccess,
9291
AMDGPU::FeatureUnalignedScratchAccess,
9392
AMDGPU::FeatureUnalignedAccessMode,
9493

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,8 +1433,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
14331433
AddrSpace == AMDGPUAS::REGION_ADDRESS) {
14341434
// Check if alignment requirements for ds_read/write instructions are
14351435
// disabled.
1436-
if (Subtarget->hasUnalignedDSAccess() &&
1437-
Subtarget->hasUnalignedAccessMode() &&
1436+
if (Subtarget->hasUnalignedDSAccessEnabled() &&
14381437
!Subtarget->hasLDSMisalignedBug()) {
14391438
if (IsFast)
14401439
*IsFast = Alignment != Align(2);
@@ -1484,7 +1483,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
14841483
return AlignedBy4;
14851484
}
14861485

1487-
if (Subtarget->hasUnalignedBufferAccess() &&
1486+
if (Subtarget->hasUnalignedBufferAccessEnabled() &&
14881487
!(AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
14891488
AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
14901489
// If we have an uniform constant load, it still requires using a slow

llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
3-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
4-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
5-
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s
2+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s
3+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s
4+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s
5+
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s
66

77
; FIXME:
88
; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s

llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
2-
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
2+
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE %s
33
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -march=amdgcn < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
4-
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
5-
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
6-
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s
7-
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-buffer-access < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
4+
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA %s
5+
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -disable-promote-alloca-to-vector -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
6+
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=+promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-PROMOTE-VECT -check-prefix=SI -check-prefix=FUNC %s
7+
; RUN: llc -show-mc-encoding --amdhsa-code-object-version=2 -mattr=-promote-alloca -amdgpu-load-store-vectorizer=0 -enable-amdgpu-aa=0 -verify-machineinstrs -mtriple=amdgcn-amdhsa -march=amdgcn -mcpu=tonga -mattr=-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
88

99
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=HSAOPT -check-prefix=OPT %s
1010
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -data-layout=A5 -mcpu=kaveri -amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -enable-var-scope -check-prefix=NOHSAOPT -check-prefix=OPT %s

llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s
3-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX900 %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode -amdgpu-enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s
44

55
define <2 x half> @chain_hi_to_lo_private() {
66
; GFX900-LABEL: chain_hi_to_lo_private:

llvm/test/CodeGen/AMDGPU/fast-unaligned-load-store.global.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s
3-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s
4-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-buffer-access < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-ALIGNED %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7-UNALIGNED %s
4+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9 %s
55

66
; Should not merge this to a dword load
77
define i32 @global_load_2xi16_align2(i16 addrspace(1)* %p) #0 {

llvm/test/CodeGen/AMDGPU/unaligned-load-store.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
2-
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s
2+
; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-access-mode -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s
33
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
44

55
; SI-LABEL: {{^}}local_unaligned_load_store_i16:

llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline-ir.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s
2-
# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
2+
# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
33

44
# FIXME: This overrides attributes that already are present. It should probably
55
# only touch functions without an existing attribute.
@@ -10,8 +10,8 @@
1010
# MCPU: attributes #0 = { "target-cpu"="fiji" }
1111
# MCPU: attributes #1 = { "target-cpu"="hawaii" }
1212

13-
# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-buffer-access" }
14-
# MATTR: attributes #1 = { "target-features"="+unaligned-buffer-access" }
13+
# MATTR: attributes #0 = { "target-cpu"="fiji" "target-features"="+unaligned-access-mode" }
14+
# MATTR: attributes #1 = { "target-features"="+unaligned-access-mode" }
1515

1616
--- |
1717
define amdgpu_kernel void @with_cpu_attr() #0 {

llvm/test/CodeGen/MIR/AMDGPU/llc-target-cpu-attr-from-cmdline.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=none -o - %s | FileCheck -check-prefix=MCPU %s
2-
# RUN: llc -march=amdgcn -mattr=+unaligned-buffer-access -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
2+
# RUN: llc -march=amdgcn -mattr=+unaligned-access-mode -run-pass=none -o - %s | FileCheck -check-prefix=MATTR %s
33

44
# The command line arguments for -mcpu and -mattr should manifest themselves by adding the corresponding attributes to the stub IR function.
55

66
# MCPU: attributes #0 = { "target-cpu"="hawaii" }
7-
# MATTR: attributes #0 = { "target-features"="+unaligned-buffer-access" }
7+
# MATTR: attributes #0 = { "target-features"="+unaligned-access-mode" }
88

99
---
1010
name: no_ir

llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
2-
; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
3-
; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
4-
; RUN: opt -S -passes='function(load-store-vectorizer)' -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
1+
; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
2+
; RUN: opt -S -load-store-vectorizer --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
3+
; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
4+
; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
55

66
target triple = "amdgcn--"
77
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"

llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s
1+
; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=hawaii -load-store-vectorizer -S -o - %s | FileCheck %s
22
; Copy of test/CodeGen/AMDGPU/merge-stores.ll with some additions
33

44
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"

0 commit comments

Comments
 (0)