Skip to content

[AMDGPU] Handle nontemporal and amdgpu.last.use metadata in amdgpu-lower-buffer-fat-pointers #120139

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 14, 2025

Conversation

Acim-Maravic
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Dec 16, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Acim Maravic (Acim-Maravic)

Changes

Patch is 20.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/120139.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp (+4)
  • (modified) llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-memops.ll (+5-5)
  • (added) llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-memory-metadata.ll (+383)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index c7cdd7a37282c7..14e814f64ad776 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -1088,6 +1088,10 @@ Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,
     Aux |= (Aux & AMDGPU::CPol::GLC ? AMDGPU::CPol::DLC : 0);
   if (IsVolatile)
     Aux |= AMDGPU::CPol::VOLATILE;
+  if (I->hasMetadata("amdgpu.last.use"))
+    Aux |= AMDGPU::CPol::TH_LU;
+  if (I->hasMetadata("nontemporal"))
+    Aux |= AMDGPU::CPol::TH_NT;
   Args.push_back(IRB.getInt32(Aux));
 
   Intrinsic::ID IID = Intrinsic::not_intrinsic;
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-memops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-memops.ll
index 57028a0f9b14f3..9e72470e37db43 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-memops.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-memops.ll
@@ -11,11 +11,11 @@ define void @loads(ptr addrspace(8) %buf) {
 ; CHECK-NEXT:    [[SCALAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
 ; CHECK-NEXT:    [[VEC2:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) align 8 [[BUF]], i32 16, i32 0, i32 0)
 ; CHECK-NEXT:    [[VEC4:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
-; CHECK-NEXT:    [[NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 2), !nontemporal [[META0:![0-9]+]]
+; CHECK-NEXT:    [[NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 3), !nontemporal [[META0:![0-9]+]]
 ; CHECK-NEXT:    [[INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1:![0-9]+]]
-; CHECK-NEXT:    [[NONTEMPORAL_INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0), !invariant.load [[META1]], !nontemporal [[META0]]
+; CHECK-NEXT:    [[NONTEMPORAL_INVARIANT:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 1), !invariant.load [[META1]], !nontemporal [[META0]]
 ; CHECK-NEXT:    [[VOLATILE:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
-; CHECK-NEXT:    [[VOLATILE_NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483646), !nontemporal [[META0]]
+; CHECK-NEXT:    [[VOLATILE_NONTEMPORAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483645), !nontemporal [[META0]]
 ; CHECK-NEXT:    fence syncscope("wavefront") release
 ; CHECK-NEXT:    [[ATOMIC:%.*]] = call float @llvm.amdgcn.raw.ptr.atomic.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483647)
 ; CHECK-NEXT:    fence syncscope("wavefront") acquire
@@ -50,9 +50,9 @@ define void @stores(ptr addrspace(8) %buf, float %f, <4 x float> %f4) {
 ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], float [[F:%.*]], <4 x float> [[F4:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 0)
 ; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[F4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0)
-; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 2), !nontemporal [[META0]]
+; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 3), !nontemporal [[META0]]
 ; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483648)
-; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483646), !nontemporal [[META0]]
+; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483645), !nontemporal [[META0]]
 ; CHECK-NEXT:    fence syncscope("wavefront") release
 ; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[F]], ptr addrspace(8) align 4 [[BUF]], i32 16, i32 0, i32 -2147483647)
 ; CHECK-NEXT:    fence syncscope("wavefront") acquire
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-memory-metadata.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-memory-metadata.ll
new file mode 100644
index 00000000000000..37e6e98a365725
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-memory-metadata.ll
@@ -0,0 +1,383 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s
+
+define amdgpu_kernel void @buffer_last_use_load_0(ptr addrspace(7) %in, ptr addrspace(7) %out) {
+; GFX12-LABEL: buffer_last_use_load_0:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
+; GFX12-NEXT:    s_load_b64 s[10:11], s[0:1], 0x8
+; GFX12-NEXT:    s_load_b32 s16, s[0:1], 0x10
+; GFX12-NEXT:    s_load_b64 s[6:7], s[0:1], 0x20
+; GFX12-NEXT:    s_load_b64 s[2:3], s[0:1], 0x28
+; GFX12-NEXT:    s_load_b32 s14, s[0:1], 0x30
+; GFX12-NEXT:    ; implicit-def: $sgpr0
+; GFX12-NEXT:    ; implicit-def: $sgpr1
+; GFX12-NEXT:    ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
+; GFX12-NEXT:    s_mov_b32 s17, s0
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s1, 32
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_lshl_b64 s[12:13], s[10:11], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshr_b64 s[8:9], s[4:5], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[8:9], s[8:9], s[12:13]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s13, s9
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
+; GFX12-NEXT:    s_lshr_b64 s[10:11], s[10:11], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshl_b64 s[16:17], s[16:17], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s0, s11
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s12, s10
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX12-NEXT:    s_mov_b32 s9, s13
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s10, s12
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s11, s0
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s5, s4
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; implicit-def: $sgpr0
+; GFX12-NEXT:    ; implicit-def: $sgpr4
+; GFX12-NEXT:    ; kill: def $sgpr14 killed $sgpr14 def $sgpr14_sgpr15
+; GFX12-NEXT:    s_mov_b32 s15, s0
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshl_b64 s[16:17], s[2:3], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshr_b64 s[12:13], s[6:7], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[16:17], s[12:13], s[16:17]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s13, s17
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s0, s16
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshr_b64 s[2:3], s[2:3], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshl_b64 s[14:15], s[14:15], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[2:3], s[2:3], s[14:15]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s4, s3
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX12-NEXT:    s_mov_b32 s1, s13
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s2, s12
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s3, s4
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s4, s6
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    v_mov_b32_e32 v0, s5
+; GFX12-NEXT:    buffer_load_b32 v0, v0, s[8:11], null offen th:TH_LOAD_LU
+; GFX12-NEXT:    v_mov_b32_e32 v1, s4
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_store_b32 v0, v1, s[0:3], null offen
+; GFX12-NEXT:    s_endpgm
+entry:
+  %val = load i32, ptr addrspace(7) %in, !amdgpu.last.use !{}
+  store i32 %val, ptr addrspace(7) %out
+  ret void
+}
+
+define amdgpu_kernel void @flat_last_use_load_1(ptr addrspace(7) %in, ptr addrspace(7) %out) {
+; GFX12-LABEL: flat_last_use_load_1:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_load_b64 s[6:7], s[0:1], 0x0
+; GFX12-NEXT:    s_load_b64 s[10:11], s[0:1], 0x8
+; GFX12-NEXT:    s_load_b32 s16, s[0:1], 0x10
+; GFX12-NEXT:    s_load_b64 s[4:5], s[0:1], 0x20
+; GFX12-NEXT:    s_load_b64 s[2:3], s[0:1], 0x28
+; GFX12-NEXT:    s_load_b32 s14, s[0:1], 0x30
+; GFX12-NEXT:    ; implicit-def: $sgpr0
+; GFX12-NEXT:    ; implicit-def: $sgpr1
+; GFX12-NEXT:    ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
+; GFX12-NEXT:    s_mov_b32 s17, s0
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s1, 32
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_lshl_b64 s[12:13], s[10:11], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshr_b64 s[8:9], s[6:7], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[8:9], s[8:9], s[12:13]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s13, s9
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
+; GFX12-NEXT:    s_lshr_b64 s[10:11], s[10:11], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshl_b64 s[16:17], s[16:17], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s0, s11
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s12, s10
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX12-NEXT:    s_mov_b32 s9, s13
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s10, s12
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s11, s0
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7
+; GFX12-NEXT:    ; implicit-def: $sgpr0
+; GFX12-NEXT:    ; implicit-def: $sgpr7
+; GFX12-NEXT:    ; kill: def $sgpr14 killed $sgpr14 def $sgpr14_sgpr15
+; GFX12-NEXT:    s_mov_b32 s15, s0
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshl_b64 s[16:17], s[2:3], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshr_b64 s[12:13], s[4:5], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[16:17], s[12:13], s[16:17]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s13, s17
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s0, s16
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshr_b64 s[2:3], s[2:3], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshl_b64 s[14:15], s[14:15], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[2:3], s[2:3], s[14:15]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s7, s3
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX12-NEXT:    s_mov_b32 s1, s13
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s2, s12
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s3, s7
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
+; GFX12-NEXT:    s_mov_b32 s5, 0x3ff
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    v_and_b32_e64 v0, v0, s5
+; GFX12-NEXT:    s_mov_b32 s5, 2
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    v_lshl_add_u32 v0, v0, s5, s6
+; GFX12-NEXT:    buffer_load_b32 v0, v0, s[8:11], null offen th:TH_LOAD_LU
+; GFX12-NEXT:    v_mov_b32_e32 v1, s4
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_store_b32 v0, v1, s[0:3], null offen
+; GFX12-NEXT:    s_endpgm
+entry:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %val.gep = getelementptr inbounds i32, ptr addrspace(7) %in, i32 %tid
+  %val = load i32, ptr addrspace(7) %val.gep, align 4, !amdgpu.last.use !{}
+  store i32 %val, ptr addrspace(7) %out
+  ret void
+}
+
+define amdgpu_kernel void @buffer_last_use_and_volatile_load(ptr addrspace(7) %in, ptr addrspace(7) %out) {
+; GFX12-LABEL: buffer_last_use_and_volatile_load:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
+; GFX12-NEXT:    s_load_b64 s[10:11], s[0:1], 0x8
+; GFX12-NEXT:    s_load_b32 s16, s[0:1], 0x10
+; GFX12-NEXT:    s_load_b64 s[6:7], s[0:1], 0x20
+; GFX12-NEXT:    s_load_b64 s[2:3], s[0:1], 0x28
+; GFX12-NEXT:    s_load_b32 s14, s[0:1], 0x30
+; GFX12-NEXT:    ; implicit-def: $sgpr0
+; GFX12-NEXT:    ; implicit-def: $sgpr1
+; GFX12-NEXT:    ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
+; GFX12-NEXT:    s_mov_b32 s17, s0
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s1, 32
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_lshl_b64 s[12:13], s[10:11], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshr_b64 s[8:9], s[4:5], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[8:9], s[8:9], s[12:13]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s13, s9
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
+; GFX12-NEXT:    s_lshr_b64 s[10:11], s[10:11], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshl_b64 s[16:17], s[16:17], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s0, s11
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s12, s10
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX12-NEXT:    s_mov_b32 s9, s13
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s10, s12
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s11, s0
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s5, s4
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; implicit-def: $sgpr0
+; GFX12-NEXT:    ; implicit-def: $sgpr4
+; GFX12-NEXT:    ; kill: def $sgpr14 killed $sgpr14 def $sgpr14_sgpr15
+; GFX12-NEXT:    s_mov_b32 s15, s0
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshl_b64 s[16:17], s[2:3], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshr_b64 s[12:13], s[6:7], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[16:17], s[12:13], s[16:17]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s13, s17
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s0, s16
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshr_b64 s[2:3], s[2:3], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshl_b64 s[14:15], s[14:15], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[2:3], s[2:3], s[14:15]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s4, s3
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s12, s2
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
+; GFX12-NEXT:    s_mov_b32 s1, s13
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s2, s12
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s3, s4
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s4, s6
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    v_mov_b32_e32 v0, s5
+; GFX12-NEXT:    buffer_load_b32 v0, v0, s[8:11], null offen th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT:    v_mov_b32_e32 v1, s4
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_store_b32 v0, v1, s[0:3], null offen
+; GFX12-NEXT:    s_endpgm
+entry:
+  %val = load volatile i32, ptr addrspace(7) %in, !amdgpu.last.use !{}
+  store i32 %val, ptr addrspace(7) %out
+  ret void
+}
+
+define amdgpu_kernel void @buffer_last_use_and_nontemporal_load(ptr addrspace(7) %in, ptr addrspace(7) %out) {
+; GFX12-LABEL: buffer_last_use_and_nontemporal_load:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
+; GFX12-NEXT:    s_load_b64 s[10:11], s[0:1], 0x8
+; GFX12-NEXT:    s_load_b32 s16, s[0:1], 0x10
+; GFX12-NEXT:    s_load_b64 s[6:7], s[0:1], 0x20
+; GFX12-NEXT:    s_load_b64 s[2:3], s[0:1], 0x28
+; GFX12-NEXT:    s_load_b32 s14, s[0:1], 0x30
+; GFX12-NEXT:    ; implicit-def: $sgpr0
+; GFX12-NEXT:    ; implicit-def: $sgpr1
+; GFX12-NEXT:    ; kill: def $sgpr16 killed $sgpr16 def $sgpr16_sgpr17
+; GFX12-NEXT:    s_mov_b32 s17, s0
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s1, 32
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_lshl_b64 s[12:13], s[10:11], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshr_b64 s[8:9], s[4:5], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[8:9], s[8:9], s[12:13]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s13, s9
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
+; GFX12-NEXT:    s_lshr_b64 s[10:11], s[10:11], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshl_b64 s[16:17], s[16:17], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[10:11], s[10:11], s[16:17]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s0, s11
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s12, s10
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
+; GFX12-NEXT:    s_mov_b32 s9, s13
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s10, s12
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s11, s0
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s5, s4
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    ; implicit-def: $sgpr0
+; GFX12-NEXT:    ; implicit-def: $sgpr4
+; GFX12-NEXT:    ; kill: def $sgpr14 killed $sgpr14 def $sgpr14_sgpr15
+; GFX12-NEXT:    s_mov_b32 s15, s0
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshl_b64 s[16:17], s[2:3], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshr_b64 s[12:13], s[6:7], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[16:17], s[12:13], s[16:17]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s13, s17
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s0, s16
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshr_b64 s[2:3], s[2:3], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_lshl_b64 s[14:15], s[14:15], s1
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_or_b64 s[2:3], s[2:3], s[14:15]
+; GFX12-NEXT:    s_wait_alu 0xfffe
+; GFX12-NEXT:    s_mov_b32 s4, s3
+; GFX12-NEXT:    s_wai...
[truncated]

@@ -1088,6 +1088,10 @@ Value *SplitPtrStructs::handleMemoryInst(Instruction *I, Value *Arg, Value *Ptr,
Aux |= (Aux & AMDGPU::CPol::GLC ? AMDGPU::CPol::DLC : 0);
if (IsVolatile)
Aux |= AMDGPU::CPol::VOLATILE;
if (I->hasMetadata("amdgpu.last.use"))
Aux |= AMDGPU::CPol::TH_LU;
if (I->hasMetadata("nontemporal"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the enum MD_nontemporal form

@@ -0,0 +1,383 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefix=GFX12 %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why -O0? Can you merge this with the existing buffer lowering test?

if (I->hasMetadata("amdgpu.last.use"))
Aux |= AMDGPU::CPol::TH_LU;
if (I->hasMetadata("nontemporal"))
Aux |= AMDGPU::CPol::TH_NT;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should only use the TH_* constants on GFX12, right?

@Acim-Maravic
Copy link
Contributor Author

Acim-Maravic commented Dec 27, 2024

I have modified the code so that the metadata is now handled in the Selection DAG, so there is no need for my previous changes that were in the amdgpu-lower-buffer-fat-pointers.

In this pass, the metadata was already being copied onto the intrinsic; the issue was that it wasn't being handled afterward.

Comment on lines 1243 to 1247

if (CI.hasMetadata(LLVMContext::MD_nontemporal))
Info.flags |= MachineMemOperand::MONonTemporal;
Info.flags |= getTargetMMOFlags(CI);

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move this up to the top of the function? It seems like it could apply to all intrinsics, not just buffer intrinsics.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes... I moved it up... There were already some metadata handling...

Comment on lines -1077 to -1079
bool IsInvariant =
(isa<LoadInst>(I) && I->getMetadata(LLVMContext::MD_invariant_load));
bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing IsInvariant and IsNonTemporal makes sense to me, since we preserve this info when we copying the metadata to the intrinsic.

But I'm not sure if it is safe to remove the glc/dlc logic below.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That GLC/DLC is what's used to implement nontemporal, no? It was in the lowering logic up in LLPC when I copied from it.

If we're removing adding that data here, I'd like pointers in code - and tests for, if we don't have them - to setting glc/dlc in both SelectionDAG and GlobalISel's handling of the intrinsics

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IsNonTemporal should be handled post-isel by SIMemoryLegalizer, which sets bits in the aux (aka cpol) operand based on flags in the MachineMemOperand. It also knows how to do it properly for different architectures, which this code does not - e.g. the meaning of the aux bits changes completely in GFX12.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before GFX12, nontemporal was marked as GLC+SLC. This pass was coping Metadata to an intrinsic, and that Metadata is handled by SIMemoryLegalizer, the only problem was that during instruction selection, metadata was not copied (it was lost), so SIMemoryLegalizer was not able to handle it afterwards.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That GLC/DLC is what's used to implement nontemporal, no? It was in the lowering logic up in LLPC when I copied from it.

If we're removing adding that data here, I'd like pointers in code - and tests for, if we don't have them - to setting glc/dlc in both SelectionDAG and GlobalISel's handling of the intrinsics

I have added tests for Metadata in lower-fat-buffer-pointers - tests have been added for both SelectionDAG and GlobalISel, for all targets where the Cache policy differs.

Copy link
Contributor

@krzysz00 krzysz00 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd also like to flag #110720

Comment on lines -1077 to -1079
bool IsInvariant =
(isa<LoadInst>(I) && I->getMetadata(LLVMContext::MD_invariant_load));
bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That GLC/DLC is what's used to implement nontemporal, no? It was in the lowering logic up in LLPC when I copied from it.

If we're removing adding that data here, I'd like pointers in code - and tests for, if we don't have them - to setting glc/dlc in both SelectionDAG and GlobalISel's handling of the intrinsics

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefix=GFX12 %s


define amdgpu_kernel void @buffer_last_use_load_0(ptr addrspace(7) %in, ptr addrspace(7) %out) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, let's add this to an existing test, potentially, and also run it for gfx11 and a gfx9, at least

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LastUse was not used before GFX12

Copy link
Contributor

@krzysz00 krzysz00 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Approved, thank you

@Acim-Maravic Acim-Maravic merged commit cc3aab5 into llvm:main Jan 14, 2025
8 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jan 14, 2025

LLVM Buildbot has detected a new failure on builder llvm-clang-aarch64-darwin running on doug-worker-5 while building llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/12784

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'Clang :: Driver/arm-cortex-cpus-1.c' FAILED ********************
Exit Code: 134

Command Output (stderr):
--
RUN: at line 7: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -mcpu=generic -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-GENERIC /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -mcpu=generic -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-GENERIC /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 10: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armeb -mcpu=generic -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-BE-GENERIC /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-BE-GENERIC /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armeb -mcpu=generic -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 13: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -mthumb -mcpu=generic -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-GENERIC-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -mthumb -mcpu=generic -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-GENERIC-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 16: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armeb -mthumb -mcpu=generic -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-BE-GENERIC-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armeb -mthumb -mcpu=generic -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-BE-GENERIC-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 19: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armv4t -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V4T /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armv4t -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V4T /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 20: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -march=armv4t -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V4T /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -march=armv4t -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V4T /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 23: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armv4t -mthumb -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V4T-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armv4t -mthumb -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V4T-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 24: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -mthumb -march=armv4t -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V4T-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -mthumb -march=armv4t -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V4T-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 27: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armv5 -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armv5 -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 28: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -march=armv5 -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -march=armv5 -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 29: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armv5t -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armv5t -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 30: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -march=armv5t -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -march=armv5t -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5 /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 33: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armv5 -mthumb -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armv5 -mthumb -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 34: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -march=armv5 -mthumb -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target arm -march=armv5 -mthumb -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
RUN: at line 35: /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armv5t -mthumb -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c 2>&1 | /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/clang -target armv5t -mthumb -### -c /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
+ /Users/buildbot/buildbot-root/aarch64-darwin/build/bin/FileCheck -check-prefix=CHECK-V5-THUMB /Users/buildbot/buildbot-root/aarch64-darwin/llvm-project/clang/test/Driver/arm-cortex-cpus-1.c
...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants