Skip to content

Commit 3e53aea

Browse files
authored
AMDGPU: Make frame index folding logic consistent with eliminateFrameIndex (#129633)
This adds handling of s_add_u32, which is handled and removes handling of s_or_b32 and s_and_b32, which are not. I was working on handling them in #102345, but need to finish that patch. This fixes a regression exposed by a316539 where the final instruction would use two literals.
1 parent 68427bc commit 3e53aea

File tree

6 files changed

+271
-61
lines changed

6 files changed

+271
-61
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,7 @@ bool SIFoldOperandsImpl::frameIndexMayFold(
232232
const unsigned Opc = UseMI.getOpcode();
233233
switch (Opc) {
234234
case AMDGPU::S_ADD_I32:
235-
case AMDGPU::S_OR_B32:
236-
case AMDGPU::S_AND_B32:
235+
case AMDGPU::S_ADD_U32:
237236
case AMDGPU::V_ADD_U32_e32:
238237
case AMDGPU::V_ADD_CO_U32_e32:
239238
// TODO: Possibly relax hasOneUse. It matters more for mubuf, since we have

llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,4 +426,150 @@ body: |
426426
$sgpr4 = COPY %4
427427
$sgpr5 = COPY %5
428428
SI_RETURN implicit $sgpr4, implicit $sgpr5
429+
430+
...
431+
432+
name: fold_frame_index__s_add_u32__fi_const
433+
tracksRegLiveness: true
434+
frameInfo:
435+
maxAlignment: 4
436+
localFrameSize: 16384
437+
stack:
438+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
439+
body: |
440+
bb.0:
441+
; CHECK-LABEL: name: fold_frame_index__s_add_u32__fi_const
442+
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 %stack.0, 128, implicit-def $scc
443+
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_U32_]]
444+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
445+
%0:sreg_32 = S_MOV_B32 %stack.0
446+
%1:sreg_32 = S_ADD_U32 %0, 128, implicit-def $scc
447+
$sgpr4 = COPY %1
448+
SI_RETURN implicit $sgpr4
449+
...
450+
451+
---
452+
name: fold_frame_index__s_add_u32__const_fi
453+
tracksRegLiveness: true
454+
frameInfo:
455+
maxAlignment: 4
456+
localFrameSize: 16384
457+
stack:
458+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
459+
body: |
460+
bb.0:
461+
; CHECK-LABEL: name: fold_frame_index__s_add_u32__const_fi
462+
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 128, %stack.0, implicit-def $scc
463+
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_U32_]]
464+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
465+
%0:sreg_32 = S_MOV_B32 %stack.0
466+
%1:sreg_32 = S_ADD_U32 128, %0, implicit-def $scc
467+
$sgpr4 = COPY %1
468+
SI_RETURN implicit $sgpr4
469+
...
470+
471+
---
472+
name: fold_frame_index__s_add_u32__fi_inlineimm
473+
tracksRegLiveness: true
474+
frameInfo:
475+
maxAlignment: 4
476+
localFrameSize: 16384
477+
stack:
478+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
479+
body: |
480+
bb.0:
481+
; CHECK-LABEL: name: fold_frame_index__s_add_u32__fi_inlineimm
482+
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 %stack.0, 16, implicit-def $scc
483+
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_U32_]]
484+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
485+
%0:sreg_32 = S_MOV_B32 %stack.0
486+
%1:sreg_32 = S_ADD_U32 %0, 16, implicit-def $scc
487+
$sgpr4 = COPY %1
488+
SI_RETURN implicit $sgpr4
489+
...
490+
491+
---
492+
name: fold_frame_index__s_add_u32__inlineimm_fi
493+
tracksRegLiveness: true
494+
frameInfo:
495+
maxAlignment: 4
496+
localFrameSize: 16384
497+
stack:
498+
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
499+
body: |
500+
bb.0:
501+
; CHECK-LABEL: name: fold_frame_index__s_add_u32__inlineimm_fi
502+
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 16, %stack.0, implicit-def $scc
503+
; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_U32_]]
504+
; CHECK-NEXT: SI_RETURN implicit $sgpr4
505+
%0:sreg_32 = S_MOV_B32 %stack.0
506+
%1:sreg_32 = S_ADD_U32 16, %0, implicit-def $scc
507+
$sgpr4 = COPY %1
508+
SI_RETURN implicit $sgpr4
509+
...
510+
511+
---
512+
name: no_fold_literal_and_fi_s_or_b32
513+
tracksRegLiveness: true
514+
frameInfo:
515+
maxAlignment: 16
516+
localFrameSize: 8192
517+
stack:
518+
- { id: 0, size: 4096, alignment: 4, local-offset: 0 }
519+
- { id: 1, size: 4096, alignment: 16, local-offset: 4096 }
520+
body: |
521+
bb.0:
522+
; CHECK-LABEL: name: no_fold_literal_and_fi_s_or_b32
523+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.1
524+
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[S_MOV_B32_]], 12345, implicit-def dead $scc
525+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
526+
%0:sreg_32 = S_MOV_B32 12345
527+
%1:sreg_32 = S_MOV_B32 %stack.1
528+
%2:sreg_32 = S_AND_B32 killed %1, killed %0, implicit-def dead $scc
529+
S_ENDPGM 0, implicit %2
530+
531+
...
532+
533+
---
534+
name: no_fold_literal_or_fi_s_or_b32
535+
tracksRegLiveness: true
536+
frameInfo:
537+
maxAlignment: 16
538+
localFrameSize: 8192
539+
stack:
540+
- { id: 0, size: 4096, alignment: 4, local-offset: 0 }
541+
- { id: 1, size: 4096, alignment: 16, local-offset: 4096 }
542+
body: |
543+
bb.0:
544+
; CHECK-LABEL: name: no_fold_literal_or_fi_s_or_b32
545+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.1
546+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_MOV_B32_]], 12345, implicit-def dead $scc
547+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_OR_B32_]]
548+
%0:sreg_32 = S_MOV_B32 12345
549+
%1:sreg_32 = S_MOV_B32 %stack.1
550+
%2:sreg_32 = S_OR_B32 killed %1, killed %0, implicit-def dead $scc
551+
S_ENDPGM 0, implicit %2
552+
553+
...
554+
555+
---
556+
name: no_fold_literal_and_fi_s_mul_i32
557+
tracksRegLiveness: true
558+
frameInfo:
559+
maxAlignment: 16
560+
localFrameSize: 8192
561+
stack:
562+
- { id: 0, size: 4096, alignment: 4, local-offset: 0 }
563+
- { id: 1, size: 4096, alignment: 16, local-offset: 4096 }
564+
body: |
565+
bb.0:
566+
; CHECK-LABEL: name: no_fold_literal_and_fi_s_mul_i32
567+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.1
568+
; CHECK-NEXT: [[S_MUL_I32_:%[0-9]+]]:sreg_32 = S_MUL_I32 killed [[S_MOV_B32_]], 12345, implicit-def dead $scc
569+
; CHECK-NEXT: S_ENDPGM 0, implicit [[S_MUL_I32_]]
570+
%0:sreg_32 = S_MOV_B32 12345
571+
%1:sreg_32 = S_MOV_B32 %stack.1
572+
%2:sreg_32 = S_MUL_I32 killed %1, killed %0, implicit-def dead $scc
573+
S_ENDPGM 0, implicit %2
574+
429575
...

llvm/test/CodeGen/AMDGPU/fold-operands-s-add-copy-to-vgpr.mir

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,10 @@ stack:
394394
body: |
395395
bb.0:
396396
; CHECK-LABEL: name: fold_s_or_b32__mov_fi_const_copy_to_virt_vgpr
397-
; CHECK: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 128, %stack.0, implicit $exec
398-
; CHECK-NEXT: SI_RETURN implicit [[V_OR_B32_e32_]]
397+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
398+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_MOV_B32_]], 128, implicit-def dead $scc
399+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]]
400+
; CHECK-NEXT: SI_RETURN implicit [[COPY]]
399401
%0:sreg_32 = S_MOV_B32 %stack.0
400402
%1:sreg_32 = S_OR_B32 %0, 128, implicit-def dead $scc
401403
%2:vgpr_32 = COPY %1
@@ -410,8 +412,10 @@ stack:
410412
body: |
411413
bb.0:
412414
; CHECK-LABEL: name: fold_s_or_b32__const_copy_mov_fi_to_virt_vgpr
413-
; CHECK: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 128, %stack.0, implicit $exec
414-
; CHECK-NEXT: SI_RETURN implicit [[V_OR_B32_e32_]]
415+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
416+
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 128, [[S_MOV_B32_]], implicit-def dead $scc
417+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]]
418+
; CHECK-NEXT: SI_RETURN implicit [[COPY]]
415419
%0:sreg_32 = S_MOV_B32 %stack.0
416420
%1:sreg_32 = S_OR_B32 128, %0, implicit-def dead $scc
417421
%2:vgpr_32 = COPY %1
@@ -426,8 +430,8 @@ stack:
426430
body: |
427431
bb.0:
428432
; CHECK-LABEL: name: fold_s_or_b32__fi_imm_copy_to_virt_vgpr
429-
; CHECK: %1:vgpr_32 = disjoint V_OR_B32_e64 64, %stack.0, implicit $exec
430-
; CHECK-NEXT: SI_RETURN implicit %1
433+
; CHECK: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = disjoint V_OR_B32_e64 64, %stack.0, implicit $exec
434+
; CHECK-NEXT: SI_RETURN implicit [[V_OR_B32_e64_]]
431435
%0:sreg_32 = disjoint S_OR_B32 %stack.0, 64, implicit-def dead $scc
432436
%1:vgpr_32 = COPY %0
433437
SI_RETURN implicit %1
@@ -441,8 +445,8 @@ stack:
441445
body: |
442446
bb.0:
443447
; CHECK-LABEL: name: fold_s_or_b32__imm_fi_copy_to_virt_vgpr
444-
; CHECK: %1:vgpr_32 = disjoint V_OR_B32_e64 64, %stack.0, implicit $exec
445-
; CHECK-NEXT: SI_RETURN implicit %1
448+
; CHECK: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = disjoint V_OR_B32_e64 64, %stack.0, implicit $exec
449+
; CHECK-NEXT: SI_RETURN implicit [[V_OR_B32_e64_]]
446450
%0:sreg_32 = disjoint S_OR_B32 64, %stack.0, implicit-def dead $scc
447451
%1:vgpr_32 = COPY %0
448452
SI_RETURN implicit %1
@@ -521,8 +525,10 @@ stack:
521525
body: |
522526
bb.0:
523527
; CHECK-LABEL: name: fold_s_and_b32__mov_fi_const_copy_to_virt_vgpr
524-
; CHECK: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 128, %stack.0, implicit $exec
525-
; CHECK-NEXT: SI_RETURN implicit [[V_AND_B32_e32_]]
528+
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
529+
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], 128, implicit-def dead $scc
530+
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]]
531+
; CHECK-NEXT: SI_RETURN implicit [[COPY]]
526532
%0:sreg_32 = S_MOV_B32 %stack.0
527533
%1:sreg_32 = S_AND_B32 %0, 128, implicit-def dead $scc
528534
%2:vgpr_32 = COPY %1

llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,4 +374,46 @@ vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i
374374
ret void
375375
}
376376

377+
; GCN-LABEL: {{^}}fi_sop2_and_literal_error:
378+
; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1fe00
379+
define amdgpu_kernel void @fi_sop2_and_literal_error() #0 {
380+
entry:
381+
%.omp.reduction.element.i.i.i.i = alloca [1024 x i32], align 4, addrspace(5)
382+
%Total3.i.i = alloca [1024 x i32], align 16, addrspace(5)
383+
%p2i = ptrtoint ptr addrspace(5) %Total3.i.i to i32
384+
br label %.shuffle.then.i.i.i.i
385+
386+
.shuffle.then.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i, %entry
387+
store i64 0, ptr addrspace(5) null, align 4
388+
%or = and i32 %p2i, -512
389+
%icmp = icmp ugt i32 %or, 9999999
390+
br i1 %icmp, label %.shuffle.then.i.i.i.i, label %vector.body.i.i.i.i
391+
392+
vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i
393+
%wide.load9.i.i.i.i = load <2 x i32>, ptr addrspace(5) %.omp.reduction.element.i.i.i.i, align 4
394+
store <2 x i32> %wide.load9.i.i.i.i, ptr addrspace(5) null, align 4
395+
ret void
396+
}
397+
398+
; GCN-LABEL: {{^}}fi_sop2_or_literal_error:
399+
; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x3039
400+
define amdgpu_kernel void @fi_sop2_or_literal_error() #0 {
401+
entry:
402+
%.omp.reduction.element.i.i.i.i = alloca [1024 x i32], align 4, addrspace(5)
403+
%Total3.i.i = alloca [1024 x i32], align 16, addrspace(5)
404+
%p2i = ptrtoint ptr addrspace(5) %Total3.i.i to i32
405+
br label %.shuffle.then.i.i.i.i
406+
407+
.shuffle.then.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i, %entry
408+
store i64 0, ptr addrspace(5) null, align 4
409+
%or = or i32 %p2i, 12345
410+
%icmp = icmp ugt i32 %or, 9999999
411+
br i1 %icmp, label %.shuffle.then.i.i.i.i, label %vector.body.i.i.i.i
412+
413+
vector.body.i.i.i.i: ; preds = %.shuffle.then.i.i.i.i
414+
%wide.load9.i.i.i.i = load <2 x i32>, ptr addrspace(5) %.omp.reduction.element.i.i.i.i, align 4
415+
store <2 x i32> %wide.load9.i.i.i.i, ptr addrspace(5) null, align 4
416+
ret void
417+
}
418+
377419
attributes #0 = { nounwind }

llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,10 @@
77
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SCRATCH2048K %s
88

99
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo16:
10-
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
11-
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xfffc, [[FI]]
12-
; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}],
10+
; GCN: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
11+
; GCN: s_and_b32 s{{[0-9]+}}, [[FI]], 0xfffc
12+
; GCN: v_mov_b32_e32 [[VFI:v[0-9]+]], [[FI]]{{$}}
13+
; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}], [[VFI]]
1314
define amdgpu_kernel void @scratch_buffer_known_high_masklo16() {
1415
%alloca = alloca i32, align 4, addrspace(5)
1516
store volatile i32 15, ptr addrspace(5) %alloca
@@ -20,11 +21,15 @@ define amdgpu_kernel void @scratch_buffer_known_high_masklo16() {
2021
}
2122

2223
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo17:
23-
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
24-
; SCRATCH128K-NOT: v_and_b32
25-
; SCRATCH256K: v_and_b32_e32 v{{[0-9]+}}, 0x1fffc, [[FI]]
26-
; SCRATCH1024K: v_and_b32_e32 v{{[0-9]+}}, 0x1fffc, [[FI]]
27-
; SCRATCH2048K: v_and_b32_e32 v{{[0-9]+}}, 0x1fffc, [[FI]]
24+
; SCRATCH256K: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
25+
; SCRATCH256K: s_and_b32 s{{[0-9]+}}, [[FI]], 0x1fffc
26+
27+
; SCRATCH1024K: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
28+
; SCRATCH1024K: s_and_b32 s{{[0-9]+}}, [[FI]], 0x1fffc
29+
30+
; SCRATCH2048K: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
31+
; SCRATCH2048K: s_and_b32 s{{[0-9]+}}, [[FI]], 0x1fffc
32+
2833
; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}],
2934
define amdgpu_kernel void @scratch_buffer_known_high_masklo17() {
3035
%alloca = alloca i32, align 4, addrspace(5)
@@ -36,11 +41,17 @@ define amdgpu_kernel void @scratch_buffer_known_high_masklo17() {
3641
}
3742

3843
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo18:
39-
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
40-
; SCRATCH128K-NOT: v_and_b32
41-
; SCRATCH256K-NOT: v_and_b32
42-
; SCRATCH1024K: v_and_b32_e32 v{{[0-9]+}}, 0x3fffc, [[FI]]
43-
; SCRATCH2048K: v_and_b32_e32 v{{[0-9]+}}, 0x3fffc, [[FI]]
44+
; SCRATCH128K: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
45+
; SCRATCH256K: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
46+
; SCRATCH128K-NOT: and_b32
47+
; SCRATCH256K-NOT: and_b32
48+
49+
; SCRATCH1024K: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
50+
; SCRATCH1024K: s_and_b32 s{{[0-9]+}}, [[FI]], 0x3fffc
51+
52+
; SCRATCH2048K: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
53+
; SCRATCH2048K: s_and_b32 s{{[0-9]+}}, [[FI]], 0x3fffc
54+
4455
; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}],
4556
define amdgpu_kernel void @scratch_buffer_known_high_masklo18() {
4657
%alloca = alloca i32, align 4, addrspace(5)
@@ -52,11 +63,16 @@ define amdgpu_kernel void @scratch_buffer_known_high_masklo18() {
5263
}
5364

5465
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo20:
55-
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
56-
; SCRATCH128K-NOT: v_and_b32
57-
; SCRATCH256K-NOT: v_and_b32
58-
; SCRATCH1024K-NOT: v_and_b32
59-
; SCRATCH2048K: v_and_b32_e32 v{{[0-9]+}}, 0xffffc, [[FI]]
66+
; SCRATCH128K: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
67+
; SCRATCH256K: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
68+
; SCRATCH1024K: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
69+
70+
; SCRATCH128K-NOT: and_b32
71+
; SCRATCH256K-NOT: and_b32
72+
; SCRATCH1024K-NOT: and_b32
73+
74+
; SCRATCH2048K: s_mov_b32 [[FI:s[0-9]+]], 0{{$}}
75+
; SCRATCH2048K: s_and_b32 s{{[0-9]+}}, [[FI]], 0xffffc
6076
; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}],
6177
define amdgpu_kernel void @scratch_buffer_known_high_masklo20() {
6278
%alloca = alloca i32, align 4, addrspace(5)
@@ -69,7 +85,7 @@ define amdgpu_kernel void @scratch_buffer_known_high_masklo20() {
6985

7086
; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo21:
7187
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
72-
; GCN-NOT: v_and_b32
88+
; GCN-NOT: and_b32
7389
; GCN: {{flat|global}}_store_{{dword|b32}} v[{{[0-9]+:[0-9]+}}],
7490
define amdgpu_kernel void @scratch_buffer_known_high_masklo21() {
7591
%alloca = alloca i32, align 4, addrspace(5)

0 commit comments

Comments
 (0)