[AMDGPU] Auto-generating lit test patterns (NFC) #93837

cdevadas · 2024-05-30T15:32:36Z

Test CodeGen/AMDGPU/build_vector.ll has the lit patterns partially hand-written and the rest auto-generated. It doesn't look good when changes are required with future patches. Auto-generating the entire pattern.

llvmbot · 2024-05-30T15:33:10Z

@llvm/pr-subscribers-backend-amdgpu

Author: Christudasan Devadasan (cdevadas)

Changes

Test CodeGen/AMDGPU/build_vector.ll has the lit patterns partially hand-written and the rest auto-generated. It doesn't look good when changes are required with future patches. Auto-generating the entire pattern.

Full diff: https://github.com/llvm/llvm-project/pull/93837.diff

1 Files Affected:

(modified) llvm/test/CodeGen/AMDGPU/build_vector.ll (+303-87)

diff --git a/llvm/test/CodeGen/AMDGPU/build_vector.ll b/llvm/test/CodeGen/AMDGPU/build_vector.ll
index 99755133f36d6..f23cd0d345104 100644
--- a/llvm/test/CodeGen/AMDGPU/build_vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/build_vector.ll
@@ -1,95 +1,311 @@
-; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefixes=R600,ALL
-; RUN: llc < %s -mtriple=amdgcn -verify-machineinstrs | FileCheck %s --check-prefixes=GFX6,GFX678,ALL
-; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX8,GFX678,ALL
-; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10,GFX1011,ALL
-; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX1011,ALL
-; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s --check-prefixes=GFX940,ALL
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefixes=R600
+; RUN: llc < %s -mtriple=amdgcn -verify-machineinstrs | FileCheck %s --check-prefixes=GCN
+; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX8
+; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10
+; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s --check-prefixes=GFX940
 
-; ALL-LABEL: {{^}}build_vector2:
-; R600: MOV
-; R600: MOV
-; R600-NOT: MOV
-; GFX678-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
-; GFX678-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
-; GFX1011-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
-; GFX1011-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
-; GFX678: buffer_store_dwordx2 v[[[X]]:[[Y]]]
-; GFX10: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX11: global_store_b64 v2, v[0:1], s[0:1]
 define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) {
+; R600-LABEL: build_vector2:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    ALU 4, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     MOV * T0.Y, literal.x,
+; R600-NEXT:    6(8.407791e-45), 0(0.000000e+00)
+; R600-NEXT:     MOV T0.X, literal.x,
+; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; R600-NEXT:    5(7.006492e-45), 2(2.802597e-45)
+;
+; GCN-LABEL: build_vector2:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, 5
+; GCN-NEXT:    v_mov_b32_e32 v1, 6
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GCN-NEXT:    s_endpgm
+;
+; GFX8-LABEL: build_vector2:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    v_mov_b32_e32 v0, 5
+; GFX8-NEXT:    v_mov_b32_e32 v1, 6
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
+;
+; GFX10-LABEL: build_vector2:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 5
+; GFX10-NEXT:    v_mov_b32_e32 v1, 6
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: build_vector2:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT:    v_mov_b32_e32 v2, 0
+; GFX11-NEXT:    v_mov_b32_e32 v0, 5
+; GFX11-NEXT:    v_mov_b32_e32 v1, 6
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX940-LABEL: build_vector2:
+; GFX940:       ; %bb.0: ; %entry
+; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX940-NEXT:    v_mov_b32_e32 v2, 0
+; GFX940-NEXT:    v_mov_b32_e32 v0, 5
+; GFX940-NEXT:    v_mov_b32_e32 v1, 6
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
+; GFX940-NEXT:    s_endpgm
 entry:
   store <2 x i32> <i32 5, i32 6>, ptr addrspace(1) %out
   ret void
 }
 
-; ALL-LABEL: {{^}}build_vector4:
-; R600: MOV
-; R600: MOV
-; R600: MOV
-; R600: MOV
-; R600-NOT: MOV
-; GFX678-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
-; GFX678-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
-; GFX678-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7
-; GFX678-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8
-; GFX1011-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
-; GFX1011-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
-; GFX1011-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7
-; GFX1011-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8
-; GFX678: buffer_store_dwordx4 v[[[X]]:[[W]]]
-; GFX10: global_store_dwordx4 v4, v[0:3], s[0:1]
-; GFX11: global_store_b128 v4, v[0:3], s[0:1]
 define amdgpu_kernel void @build_vector4 (ptr addrspace(1) %out) {
+; R600-LABEL: build_vector4:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    ALU 8, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     MOV * T0.W, literal.x,
+; R600-NEXT:    8(1.121039e-44), 0(0.000000e+00)
+; R600-NEXT:     MOV * T0.Z, literal.x,
+; R600-NEXT:    7(9.809089e-45), 0(0.000000e+00)
+; R600-NEXT:     MOV * T0.Y, literal.x,
+; R600-NEXT:    6(8.407791e-45), 0(0.000000e+00)
+; R600-NEXT:     MOV T0.X, literal.x,
+; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
+; R600-NEXT:    5(7.006492e-45), 2(2.802597e-45)
+;
+; GCN-LABEL: build_vector4:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, 5
+; GCN-NEXT:    v_mov_b32_e32 v1, 6
+; GCN-NEXT:    v_mov_b32_e32 v2, 7
+; GCN-NEXT:    v_mov_b32_e32 v3, 8
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GCN-NEXT:    s_endpgm
+;
+; GFX8-LABEL: build_vector4:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    v_mov_b32_e32 v0, 5
+; GFX8-NEXT:    v_mov_b32_e32 v1, 6
+; GFX8-NEXT:    v_mov_b32_e32 v2, 7
+; GFX8-NEXT:    v_mov_b32_e32 v3, 8
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
+;
+; GFX10-LABEL: build_vector4:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-NEXT:    v_mov_b32_e32 v4, 0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 5
+; GFX10-NEXT:    v_mov_b32_e32 v1, 6
+; GFX10-NEXT:    v_mov_b32_e32 v2, 7
+; GFX10-NEXT:    v_mov_b32_e32 v3, 8
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: build_vector4:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT:    v_mov_b32_e32 v4, 0
+; GFX11-NEXT:    v_mov_b32_e32 v0, 5
+; GFX11-NEXT:    v_mov_b32_e32 v1, 6
+; GFX11-NEXT:    v_mov_b32_e32 v2, 7
+; GFX11-NEXT:    v_mov_b32_e32 v3, 8
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[0:1]
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX940-LABEL: build_vector4:
+; GFX940:       ; %bb.0: ; %entry
+; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX940-NEXT:    v_mov_b32_e32 v4, 0
+; GFX940-NEXT:    v_mov_b32_e32 v0, 5
+; GFX940-NEXT:    v_mov_b32_e32 v1, 6
+; GFX940-NEXT:    v_mov_b32_e32 v2, 7
+; GFX940-NEXT:    v_mov_b32_e32 v3, 8
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
+; GFX940-NEXT:    s_endpgm
 entry:
   store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, ptr addrspace(1) %out
   ret void
 }
 
-
-; ALL-LABEL: {{^}}build_vector_v2i16:
-; R600: MOV
-; R600-NOT: MOV
-; GFX678: s_mov_b32 s3, 0xf000
-; GFX678: s_mov_b32 s2, -1
-; GFX678: v_mov_b32_e32 v0, 0x60005
-; GFX678: s_waitcnt lgkmcnt(0)
-; GFX678: buffer_store_dword v0, off, s[0:3], 0
-; GFX1011: v_mov_b32_e32 v0, 0
-; GFX1011: v_mov_b32_e32 v1, 0x60005
-; GFX1011: s_waitcnt lgkmcnt(0)
-; GFX10: global_store_dword v0, v1, s[0:1]
-; GFX11: global_store_b32 v0, v1, s[0:1]
 define amdgpu_kernel void @build_vector_v2i16 (ptr addrspace(1) %out) {
+; R600-LABEL: build_vector_v2i16:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.X, T5.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     MOV T4.X, literal.x,
+; R600-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
+; R600-NEXT:    393221(5.510200e-40), 2(2.802597e-45)
+;
+; GCN-LABEL: build_vector_v2i16:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, 0x60005
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT:    s_endpgm
+;
+; GFX8-LABEL: build_vector_v2i16:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    v_mov_b32_e32 v0, 0x60005
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
+;
+; GFX10-LABEL: build_vector_v2i16:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    v_mov_b32_e32 v1, 0x60005
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: build_vector_v2i16:
+; GFX11:       ; %bb.0: ; %entry
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    v_mov_b32_e32 v1, 0x60005
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX940-LABEL: build_vector_v2i16:
+; GFX940:       ; %bb.0: ; %entry
+; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX940-NEXT:    v_mov_b32_e32 v0, 0
+; GFX940-NEXT:    v_mov_b32_e32 v1, 0x60005
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
+; GFX940-NEXT:    s_endpgm
 entry:
   store <2 x i16> <i16 5, i16 6>, ptr addrspace(1) %out
   ret void
 }
 
-; ALL-LABEL: {{^}}build_vector_v2i16_trunc:
-; R600: LSHR
-; R600: OR_INT
-; R600: LSHR
-; R600-NOT: MOV
-; GFX6: s_mov_b32 s3, 0xf000
-; GFX6: s_waitcnt lgkmcnt(0)
-; GFX6: v_alignbit_b32 v0, 5, s4, 16
-; GFX6: buffer_store_dword v0, off, s[0:3], 0
-; GFX8: s_mov_b32 s3, 0xf000
-; GFX8: s_mov_b32 s2, -1
-; GFX8: s_waitcnt lgkmcnt(0)
-; GFX8: s_lshr_b32 s4, s4, 16
-; GFX8: s_or_b32 s4, s4, 0x50000
-; GFX8: v_mov_b32_e32 v0, s4
-; GFX8: buffer_store_dword v0, off, s[0:3], 0
-; GFX1011: v_mov_b32_e32 v0, 0
-; GFX1011: s_waitcnt lgkmcnt(0)
-; GFX10: s_lshr_b32 s2, s2, 16
-; GFX10: s_pack_ll_b32_b16 s2, s2, 5
-; GFX11: s_pack_hl_b32_b16 s2, s2, 5
-; GFX1011: v_mov_b32_e32 v1, s2
-; GFX10: global_store_dword v0, v1, s[0:1]
-; GFX11: global_store_b32 v0, v1, s[0:1]
 define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32 %a) {
+; R600-LABEL: build_vector_v2i16_trunc:
+; R600:       ; %bb.0:
+; R600-NEXT:    ALU 4, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T4.X, T5.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     LSHR * T0.W, KC0[2].Z, literal.x,
+; R600-NEXT:    16(2.242078e-44), 0(0.000000e+00)
+; R600-NEXT:     OR_INT T4.X, PV.W, literal.x,
+; R600-NEXT:     LSHR * T5.X, KC0[2].Y, literal.y,
+; R600-NEXT:    327680(4.591775e-40), 2(2.802597e-45)
+;
+; GCN-LABEL: build_vector_v2i16_trunc:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dword s4, s[0:1], 0xb
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_alignbit_b32 v0, 5, s4, 16
+; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT:    s_endpgm
+;
+; GFX8-LABEL: build_vector_v2i16_trunc:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_mov_b32 s3, 0xf000
+; GFX8-NEXT:    s_mov_b32 s2, -1
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_lshr_b32 s4, s4, 16
+; GFX8-NEXT:    s_or_b32 s4, s4, 0x50000
+; GFX8-NEXT:    v_mov_b32_e32 v0, s4
+; GFX8-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT:    s_endpgm
+;
+; GFX10-LABEL: build_vector_v2i16_trunc:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x8
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_lshr_b32 s2, s2, 16
+; GFX10-NEXT:    s_pack_ll_b32_b16 s2, s2, 5
+; GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT:    s_endpgm
+;
+; GFX11-LABEL: build_vector_v2i16_trunc:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_clause 0x1
+; GFX11-NEXT:    s_load_b32 s2, s[0:1], 0x8
+; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
+; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX11-NEXT:    s_pack_hl_b32_b16 s2, s2, 5
+; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT:    v_mov_b32_e32 v1, s2
+; GFX11-NEXT:    global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT:    s_nop 0
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+;
+; GFX940-LABEL: build_vector_v2i16_trunc:
+; GFX940:       ; %bb.0:
+; GFX940-NEXT:    s_load_dword s4, s[0:1], 0x2c
+; GFX940-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX940-NEXT:    v_mov_b32_e32 v0, 0
+; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX940-NEXT:    s_lshr_b32 s0, s4, 16
+; GFX940-NEXT:    s_pack_ll_b32_b16 s0, s0, 5
+; GFX940-NEXT:    v_mov_b32_e32 v1, s0
+; GFX940-NEXT:    global_store_dword v0, v1, s[2:3] sc0 sc1
+; GFX940-NEXT:    s_endpgm
   %srl = lshr i32 %a, 16
   %trunc = trunc i32 %srl to i16
   %ins.0 = insertelement <2 x i16> undef, i16 %trunc, i32 0
@@ -98,6 +314,7 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32
   ret void
 }
 
+define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out, <4 x i16> %in) {
 ; R600-LABEL: build_v2i32_from_v4i16_shuffle:
 ; R600:       ; %bb.0: ; %entry
 ; R600-NEXT:    ALU 0, @10, KC0[], KC1[]
@@ -118,20 +335,20 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32
 ; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.y,
 ; R600-NEXT:    16(2.242078e-44), 2(2.802597e-45)
 ;
-; GFX6-LABEL: build_v2i32_from_v4i16_shuffle:
-; GFX6:       ; %bb.0: ; %entry
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
-; GFX6-NEXT:    s_lshl_b32 s2, s2, 16
-; GFX6-NEXT:    s_mov_b32 s6, -1
-; GFX6-NEXT:    s_mov_b32 s4, s0
-; GFX6-NEXT:    s_mov_b32 s5, s1
-; GFX6-NEXT:    v_mov_b32_e32 v0, s2
-; GFX6-NEXT:    v_mov_b32_e32 v1, s3
-; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
-; GFX6-NEXT:    s_endpgm
+; GCN-LABEL: build_v2i32_from_v4i16_shuffle:
+; GCN:       ; %bb.0: ; %entry
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_lshl_b32 s3, s3, 16
+; GCN-NEXT:    s_lshl_b32 s2, s2, 16
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    v_mov_b32_e32 v0, s2
+; GCN-NEXT:    v_mov_b32_e32 v1, s3
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
 ;
 ; GFX8-LABEL: build_v2i32_from_v4i16_shuffle:
 ; GFX8:       ; %bb.0: ; %entry
@@ -185,7 +402,6 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32
 ; GFX940-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
 ; GFX940-NEXT:    s_endpgm
-define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out, <4 x i16> %in) {
 entry:
   %shuf = shufflevector <4 x i16> %in, <4 x i16> zeroinitializer, <2 x i32> <i32 0, i32 2>
   %zextended = zext <2 x i16> %shuf to <2 x i32>

arsenm · 2024-05-30T18:33:13Z

llvm/test/CodeGen/AMDGPU/build_vector.ll

+; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefixes=R600
+; RUN: llc < %s -mtriple=amdgcn -verify-machineinstrs | FileCheck %s --check-prefixes=GCN
+; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX8
+; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10
+; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s --check-prefixes=GFX940


Test content could probably use some cleanup and regularization too. Might also want to split out r600?

If you are generating checks, there's no reason to keep -mattr=-flat-for-global

Should drop the -verify-machineinstrs. Should also try adding common prefixes?

Test content could probably use some cleanup and regularization too. Might also want to split out r600?

I don't the original intention of the test to do further clean up. What do you mean by split out r600?

If you are generating checks, there's no reason to keep -mattr=-flat-for-global

Why?

Should drop the -verify-machineinstrs. Should also try adding common prefixes?
Why not -verify-machineinstrs?

Should also try adding common prefixes?
The common prefixes didn't make any difference. So I removed them.

Test content could probably use some cleanup and regularization too. Might also want to split out r600?

I don't the original intention of the test to do further clean up. What do you mean by split out r600?

I mean have a separate r600 version of the test so we don't have mixed r600 and amdgcn run lines

If you are generating checks, there's no reason to keep -mattr=-flat-for-global

Why?

Because the main reason we have this anywhere was to make the gfx6/7 output closer to match gfx8 when handwriting checks

Should drop the -verify-machineinstrs. Should also try adding common prefixes?
Why not -verify-machineinstrs?

It's very expensive. It is on by default in EXPENSIVE_CHECKS builds, so we should only be using it in targeted tests for verifier errors and not just blindly add it to every test as we've usually done

arsenm · 2024-06-06T15:32:36Z

llvm/test/CodeGen/AMDGPU/build_vector.ll

+; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefixes=GFX8
+; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10
+; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s --check-prefixes=GFX940


might as well drop the verifies

arsenm · 2024-06-06T15:32:46Z

llvm/test/CodeGen/AMDGPU/build_vector.ll

-; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX1011,ALL
-; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s --check-prefixes=GFX940,ALL
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefixes=GCN


might as well switch this to using -mcpu=tahiti

[AMDGPU] Auto-generating lit test patterns (NFC)

40f41a0

Test CodeGen/AMDGPU/build_vector.ll has the lit patterns partially hand-written and the rest auto-generated. It doesn't look good when changes are required with future patches. Auto-generating the entire pattern.

llvmbot added the backend:AMDGPU label May 30, 2024

cdevadas requested a review from arsenm May 30, 2024 15:32

arsenm reviewed May 30, 2024

View reviewed changes

cdevadas added 2 commits June 3, 2024 10:48

Added a separate test for R600.

c41b539

Removed flat-for-global attr and verfy-machineinstrs.

ab226ba

cdevadas requested a review from jayfoad June 5, 2024 07:05

arsenm reviewed Jun 6, 2024

View reviewed changes

removed verifier and added mcpu in the runline.

fc8c2e4

arsenm approved these changes Jun 6, 2024

View reviewed changes

cdevadas merged commit 0e1d6e2 into llvm:main Jun 7, 2024
7 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Auto-generating lit test patterns (NFC) #93837

[AMDGPU] Auto-generating lit test patterns (NFC) #93837

Uh oh!

cdevadas commented May 30, 2024

Uh oh!

llvmbot commented May 30, 2024

Uh oh!

arsenm May 30, 2024

Uh oh!

cdevadas May 31, 2024

Uh oh!

cdevadas May 31, 2024

Uh oh!

arsenm May 31, 2024

Uh oh!

arsenm Jun 6, 2024

Uh oh!

arsenm Jun 6, 2024

Uh oh!

Uh oh!

Uh oh!

[AMDGPU] Auto-generating lit test patterns (NFC) #93837

[AMDGPU] Auto-generating lit test patterns (NFC) #93837

Uh oh!

Conversation

cdevadas commented May 30, 2024

Uh oh!

llvmbot commented May 30, 2024

Uh oh!

arsenm May 30, 2024

Choose a reason for hiding this comment

Uh oh!

cdevadas May 31, 2024

Choose a reason for hiding this comment

Uh oh!

cdevadas May 31, 2024

Choose a reason for hiding this comment

Uh oh!

arsenm May 31, 2024

Choose a reason for hiding this comment

Uh oh!

arsenm Jun 6, 2024

Choose a reason for hiding this comment

Uh oh!

arsenm Jun 6, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!