Skip to content

Commit 259d50b

Browse files
committed
AMDGPU: Make frame index folding logic consistent with eliminateFrameIndex
This adds handling of s_add_u32, which is handled and removes handling of s_or_b32 and s_and_b32, which are not. I was working on handling them in #102345, but need to finish that patch. This fixes a regression exposed by a316539 where the final instruction would use two literals.
1 parent d70a17f commit 259d50b

File tree

6 files changed

+271
-61
lines changed

6 files changed

+271
-61
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,7 @@ bool SIFoldOperandsImpl::frameIndexMayFold(
227227
const unsigned Opc = UseMI.getOpcode();
228228
switch (Opc) {
229229
case AMDGPU::S_ADD_I32:
230-
case AMDGPU::S_OR_B32:
231-
case AMDGPU::S_AND_B32:
230+
case AMDGPU::S_ADD_U32:
232231
case AMDGPU::V_ADD_U32_e32:
233232
case AMDGPU::V_ADD_CO_U32_e32:
234233
// TODO: Possibly relax hasOneUse. It matters more for mubuf, since we have

llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,3 +393,149 @@ body: |
393393
SI_RETURN implicit $vgpr0, implicit $vgpr1
394394
395395
...
396+
397+
---
398+
name: fold_frame_index__s_add_u32__fi_const
399+
tracksRegLiveness: true
400+
frameInfo:
401+
maxAlignment: 4
402+
localFrameSize: 16384
403+
stack:
404+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
405+
body: |
406+
bb.0:
407+
; CHECK-LABEL: name: fold_frame_index__s_add_u32__fi_const
408+
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 %stack.0, 128, implicit-def $scc
409+
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_U32_]]
410+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
411+
%0:sreg_32 = S_MOV_B32 %stack.0
412+
%1:sreg_32 = S_ADD_U32 %0, 128, implicit-def $scc
413+
$sgpr4 = COPY %1
414+
SI_RETURN implicit $sgpr4
415+
...
416+
417+
---
418+
name: fold_frame_index__s_add_u32__const_fi
419+
tracksRegLiveness: true
420+
frameInfo:
421+
maxAlignment: 4
422+
localFrameSize: 16384
423+
stack:
424+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
425+
body: |
426+
bb.0:
427+
; CHECK-LABEL: name: fold_frame_index__s_add_u32__const_fi
428+
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 128, %stack.0, implicit-def $scc
429+
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_U32_]]
430+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
431+
%0:sreg_32 = S_MOV_B32 %stack.0
432+
%1:sreg_32 = S_ADD_U32 128, %0, implicit-def $scc
433+
$sgpr4 = COPY %1
434+
SI_RETURN implicit $sgpr4
435+
...
436+
437+
---
438+
name: fold_frame_index__s_add_u32__fi_inlineimm
439+
tracksRegLiveness: true
440+
frameInfo:
441+
maxAlignment: 4
442+
localFrameSize: 16384
443+
stack:
444+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
445+
body: |
446+
bb.0:
447+
; CHECK-LABEL: name: fold_frame_index__s_add_u32__fi_inlineimm
448+
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 %stack.0, 16, implicit-def $scc
449+
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_U32_]]
450+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
451+
%0:sreg_32 = S_MOV_B32 %stack.0
452+
%1:sreg_32 = S_ADD_U32 %0, 16, implicit-def $scc
453+
$sgpr4 = COPY %1
454+
SI_RETURN implicit $sgpr4
455+
...
456+
457+
---
458+
name: fold_frame_index__s_add_u32__inlineimm_fi
459+
tracksRegLiveness: true
460+
frameInfo:
461+
maxAlignment: 4
462+
localFrameSize: 16384
463+
stack:
464+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
465+
body: |
466+
bb.0:
467+
; CHECK-LABEL: name: fold_frame_index__s_add_u32__inlineimm_fi
468+
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 16, %stack.0, implicit-def $scc
469+
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_U32_]]
470+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
471+
%0:sreg_32 = S_MOV_B32 %stack.0
472+
%1:sreg_32 = S_ADD_U32 16, %0, implicit-def $scc
473+
$sgpr4 = COPY %1
474+
SI_RETURN implicit $sgpr4
475+
...
476+
477+
---
478+
name: no_fold_literal_and_fi_s_or_b32
479+
tracksRegLiveness: true
480+
frameInfo:
481+
maxAlignment: 16
482+
localFrameSize: 8192
483+
stack:
484+
- { id: 0, size: 4096, alignment: 4, local-offset: 0 }
485+
- { id: 1, size: 4096, alignment: 16, local-offset: 4096 }
486+
body: |
487+
bb.0:
488+
; CHECK-LABEL: name: no_fold_literal_and_fi_s_or_b32
489+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.1
490+
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_MOV_B32_]], 12345, implicit-def dead $scc
491+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
492+
%0:sreg_32 = S_MOV_B32 12345
493+
%1:sreg_32 = S_MOV_B32 %stack.1
494+
%2:sreg_32 = S_AND_B32 killed %1, killed %0, implicit-def dead $scc
495+
S_ENDPGM 0, implicit %2
496+
497+
...
498+
499+
---
500+
name: no_fold_literal_or_fi_s_or_b32
501+
tracksRegLiveness: true
502+
frameInfo:
503+
maxAlignment: 16
504+
localFrameSize: 8192
505+
stack:
506+
- { id: 0, size: 4096, alignment: 4, local-offset: 0 }
507+
- { id: 1, size: 4096, alignment: 16, local-offset: 4096 }
508+
body: |
509+
bb.0:
510+
; CHECK-LABEL: name: no_fold_literal_or_fi_s_or_b32
511+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.1
512+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_MOV_B32_]], 12345, implicit-def dead $scc
513+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]]
514+
%0:sreg_32 = S_MOV_B32 12345
515+
%1:sreg_32 = S_MOV_B32 %stack.1
516+
%2:sreg_32 = S_OR_B32 killed %1, killed %0, implicit-def dead $scc
517+
S_ENDPGM 0, implicit %2
518+
519+
...
520+
521+
---
522+
name: no_fold_literal_and_fi_s_mul_i32
523+
tracksRegLiveness: true
524+
frameInfo:
525+
maxAlignment: 16
526+
localFrameSize: 8192
527+
stack:
528+
- { id: 0, size: 4096, alignment: 4, local-offset: 0 }
529+
- { id: 1, size: 4096, alignment: 16, local-offset: 4096 }
530+
body: |
531+
bb.0:
532+
; CHECK-LABEL: name: no_fold_literal_and_fi_s_mul_i32
533+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.1
534+
; CHECK-NEXT: [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 killed [[S_MOV_B32_]], 12345, implicit-def dead $scc
535+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_MUL_I32_]]
536+
%0:sreg_32 = S_MOV_B32 12345
537+
%1:sreg_32 = S_MOV_B32 %stack.1
538+
%2:sreg_32 = S_MUL_I32 killed %1, killed %0, implicit-def dead $scc
539+
S_ENDPGM 0, implicit %2
540+
541+
...

llvm/test/CodeGen/AMDGPU/fold-operands-s-add-copy-to-vgpr.mir

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,10 @@ stack:
394394
body: |
395395
bb.0:
396396
; CHECK-LABEL: name: fold_s_or_b32__mov_fi_const_copy_to_virt_vgpr
397-
; CHECK: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 128, %stack.0, implicit $exec
398-
; CHECK-NEXT: SI_RETURN implicit [[V_OR_B32_e32_]]
397+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
398+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_MOV_B32_]], 128, implicit-def dead $scc
399+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]]
400+
; CHECK-NEXT: SI_RETURN implicit [[COPY]]
399401
%0:sreg_32 = S_MOV_B32 %stack.0
400402
%1:sreg_32 = S_OR_B32 %0, 128, implicit-def dead $scc
401403
%2:vgpr_32 = COPY %1
@@ -410,8 +412,10 @@ stack:
410412
body: |
411413
bb.0:
412414
; CHECK-LABEL: name: fold_s_or_b32__const_copy_mov_fi_to_virt_vgpr
413-
; CHECK: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 128, %stack.0, implicit $exec
414-
; CHECK-NEXT: SI_RETURN implicit [[V_OR_B32_e32_]]
415+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
416+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 128, [[S_MOV_B32_]], implicit-def dead $scc
417+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]]
418+
; CHECK-NEXT: SI_RETURN implicit [[COPY]]
415419
%0:sreg_32 = S_MOV_B32 %stack.0
416420
%1:sreg_32 = S_OR_B32 128, %0, implicit-def dead $scc
417421
%2:vgpr_32 = COPY %1
@@ -426,8 +430,8 @@ stack:
426430
body: |
427431
bb.0:
428432
; CHECK-LABEL: name: fold_s_or_b32__fi_imm_copy_to_virt_vgpr
429-
; CHECK: %1:vgpr_32 = disjoint V_OR_B32_e64 64, %stack.0, implicit $exec
430-
; CHECK-NEXT: SI_RETURN implicit %1
433+
; CHECK: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = disjoint V_OR_B32_e64 64, %stack.0, implicit $exec
434+
; CHECK-NEXT: SI_RETURN implicit [[V_OR_B32_e64_]]
431435
%0:sreg_32 = disjoint S_OR_B32 %stack.0, 64, implicit-def dead $scc
432436
%1:vgpr_32 = COPY %0
433437
SI_RETURN implicit %1
@@ -441,8 +445,8 @@ stack:
441445
body: |
442446
bb.0:
443447
; CHECK-LABEL: name: fold_s_or_b32__imm_fi_copy_to_virt_vgpr
444-
; CHECK: %1:vgpr_32 = disjoint V_OR_B32_e64 64, %stack.0, implicit $exec
445-
; CHECK-NEXT: SI_RETURN implicit %1
448+
; CHECK: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = disjoint V_OR_B32_e64 64, %stack.0, implicit $exec
449+
; CHECK-NEXT: SI_RETURN implicit [[V_OR_B32_e64_]]
446450
%0:sreg_32 = disjoint S_OR_B32 64, %stack.0, implicit-def dead $scc
447451
%1:vgpr_32 = COPY %0
448452
SI_RETURN implicit %1
@@ -521,8 +525,10 @@ stack:
521525
body: |
522526
bb.0:
523527
; CHECK-LABEL: name: fold_s_and_b32__mov_fi_const_copy_to_virt_vgpr
524-
; CHECK: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 128, %stack.0, implicit $exec
525-
; CHECK-NEXT: SI_RETURN implicit [[V_AND_B32_e32_]]
528+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
529+
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], 128, implicit-def dead $scc
530+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]]
531+
; CHECK-NEXT: SI_RETURN implicit [[COPY]]
526532
%0:sreg_32 = S_MOV_B32 %stack.0
527533
%1:sreg_32 = S_AND_B32 %0, 128, implicit-def dead $scc
528534
%2:vgpr_32 = COPY %1

llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,4 +374,46 @@ vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i
374374
ret void
375375
}
376376

377+
; GCN-LABEL: {{^}}fi_sop2_and_literal_error:
378+
; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1fe00
379+
define amdgpu_kernel void @fi_sop2_and_literal_error() #0 {
380+
entry:
381+
%.omp.reduction.element.i.i.i.i = alloca [1024 x i32], align 4, addrspace(5)
382+
%Total3.i.i = alloca [1024 x i32], align 16, addrspace(5)
383+
%p2i = ptrtoint ptr addrspace(5) %Total3.i.i to i32
384+
br label %.shuffle.then.i.i.i.i
385+
386+
.shuffle.then.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i, %entry
387+
store i64 0, ptr addrspace(5) null, align 4
388+
%or = and i32 %p2i, -512
389+
%icmp = icmp ugt i32 %or, 9999999
390+
br i1 %icmp, label %.shuffle.then.i.i.i.i, label %vector.body.i.i.i.i
391+
392+
vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i
393+
%wide.load9.i.i.i.i = load <2 x i32>, ptr addrspace(5) %.omp.reduction.element.i.i.i.i, align 4
394+
store <2 x i32> %wide.load9.i.i.i.i, ptr addrspace(5) null, align 4
395+
ret void
396+
}
397+
398+
; GCN-LABEL: {{^}}fi_sop2_or_literal_error:
399+
; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3039
400+
define amdgpu_kernel void @fi_sop2_or_literal_error() #0 {
401+
entry:
402+
%.omp.reduction.element.i.i.i.i = alloca [1024 x i32], align 4, addrspace(5)
403+
%Total3.i.i = alloca [1024 x i32], align 16, addrspace(5)
404+
%p2i = ptrtoint ptr addrspace(5) %Total3.i.i to i32
405+
br label %.shuffle.then.i.i.i.i
406+
407+
.shuffle.then.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i, %entry
408+
store i64 0, ptr addrspace(5) null, align 4
409+
%or = or i32 %p2i, 12345
410+
%icmp = icmp ugt i32 %or, 9999999
411+
br i1 %icmp, label %.shuffle.then.i.i.i.i, label %vector.body.i.i.i.i
412+
413+
vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i
414+
%wide.load9.i.i.i.i = load <2 x i32>, ptr addrspace(5) %.omp.reduction.element.i.i.i.i, align 4
415+
store <2 x i32> %wide.load9.i.i.i.i, ptr addrspace(5) null, align 4
416+
ret void
417+
}
418+
377419
attributes #0 = { nounwind }

llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SCRATCH2048K %s
88

99
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo16:
10-
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
11-
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xfffc, [[FI]]
12-
; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}],
10+
; GCN: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
11+
; GCN: s_and_b32 s{{[0-9]+}}, [[FI]], 0xfffc
12+
; GCN: v_mov_b32_e32 [[VFI:v[0-9]+]], [[FI]]{{$}}
13+
; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}], [[VFI]]
1314
define amdgpu_kernel void @scratch_buffer_known_high_masklo16() {
1415
%alloca = alloca i32, align 4, addrspace(5)
1516
store volatile i32 15, ptr addrspace(5) %alloca
@@ -20,11 +21,15 @@ define amdgpu_kernel void @scratch_buffer_known_high_masklo16() {
2021
}
2122

2223
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo17:
23-
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
24-
; SCRATCH128K-NOT: v_and_b32
25-
; SCRATCH256K: v_and_b32_e32 v{{[0-9]+}}, 0x1fffc, [[FI]]
26-
; SCRATCH1024K: v_and_b32_e32 v{{[0-9]+}}, 0x1fffc, [[FI]]
27-
; SCRATCH2048K: v_and_b32_e32 v{{[0-9]+}}, 0x1fffc, [[FI]]
24+
; SCRATCH256K: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
25+
; SCRATCH256K: s_and_b32 s{{[0-9]+}}, [[FI]], 0x1fffc
26+
27+
; SCRATCH1024K: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
28+
; SCRATCH1024K: s_and_b32 s{{[0-9]+}}, [[FI]], 0x1fffc
29+
30+
; SCRATCH2048K: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
31+
; SCRATCH2048K: s_and_b32 s{{[0-9]+}}, [[FI]], 0x1fffc
32+
2833
; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}],
2934
define amdgpu_kernel void @scratch_buffer_known_high_masklo17() {
3035
%alloca = alloca i32, align 4, addrspace(5)
@@ -36,11 +41,17 @@ define amdgpu_kernel void @scratch_buffer_known_high_masklo17() {
3641
}
3742

3843
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo18:
39-
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
40-
; SCRATCH128K-NOT: v_and_b32
41-
; SCRATCH256K-NOT: v_and_b32
42-
; SCRATCH1024K: v_and_b32_e32 v{{[0-9]+}}, 0x3fffc, [[FI]]
43-
; SCRATCH2048K: v_and_b32_e32 v{{[0-9]+}}, 0x3fffc, [[FI]]
44+
; SCRATCH128K: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
45+
; SCRATCH256K: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
46+
; SCRATCH128K-NOT: and_b32
47+
; SCRATCH256K-NOT: and_b32
48+
49+
; SCRATCH1024K: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
50+
; SCRATCH1024K: s_and_b32 s{{[0-9]+}}, [[FI]], 0x3fffc
51+
52+
; SCRATCH2048K: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
53+
; SCRATCH2048K: s_and_b32 s{{[0-9]+}}, [[FI]], 0x3fffc
54+
4455
; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}],
4556
define amdgpu_kernel void @scratch_buffer_known_high_masklo18() {
4657
%alloca = alloca i32, align 4, addrspace(5)
@@ -52,11 +63,16 @@ define amdgpu_kernel void @scratch_buffer_known_high_masklo18() {
5263
}
5364

5465
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo20:
55-
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
56-
; SCRATCH128K-NOT: v_and_b32
57-
; SCRATCH256K-NOT: v_and_b32
58-
; SCRATCH1024K-NOT: v_and_b32
59-
; SCRATCH2048K: v_and_b32_e32 v{{[0-9]+}}, 0xffffc, [[FI]]
66+
; SCRATCH128K: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
67+
; SCRATCH256K: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
68+
; SCRATCH1024K: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
69+
70+
; SCRATCH128K-NOT: and_b32
71+
; SCRATCH256K-NOT: and_b32
72+
; SCRATCH1024K-NOT: and_b32
73+
74+
; SCRATCH2048K: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
75+
; SCRATCH2048K: s_and_b32 s{{[0-9]+}}, [[FI]], 0xffffc
6076
; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}],
6177
define amdgpu_kernel void @scratch_buffer_known_high_masklo20() {
6278
%alloca = alloca i32, align 4, addrspace(5)
@@ -69,7 +85,7 @@ define amdgpu_kernel void @scratch_buffer_known_high_masklo20() {
6985

7086
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo21:
7187
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
72-
; GCN-NOT: v_and_b32
88+
; GCN-NOT: and_b32
7389
; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}],
7490
define amdgpu_kernel void @scratch_buffer_known_high_masklo21() {
7591
%alloca = alloca i32, align 4, addrspace(5)

0 commit comments

Comments
 (0)