Skip to content

Commit 145549f

Browse files
author
Jessica Paquette
committed
[GlobalISel] Combine (x + 0) -> x, G_PTR_ADD edition
Add it to right_identity_zero. Differential Revision: https://reviews.llvm.org/D96621
1 parent 0c4935b commit 145549f

File tree

3 files changed

+51
-43
lines changed

3 files changed

+51
-43
lines changed

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,8 @@ def select_constant_cmp: GICombineRule<
267267
// Fold x op 0 -> x
268268
def right_identity_zero: GICombineRule<
269269
(defs root:$root),
270-
(match (wip_match_opcode G_SUB, G_ADD, G_OR, G_XOR, G_SHL, G_ASHR, G_LSHR):$root,
270+
(match (wip_match_opcode G_SUB, G_ADD, G_OR, G_XOR, G_SHL, G_ASHR, G_LSHR,
271+
G_PTR_ADD):$root,
271272
[{ return Helper.matchConstantOp(${root}->getOperand(2), 0); }]),
272273
(apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
273274
>;

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,3 +307,21 @@ body: |
307307
$x0 = COPY %mul(s64)
308308
RET_ReallyLR implicit $x0
309309
...
310+
---
311+
name: right_ident_ptr_add
312+
tracksRegLiveness: true
313+
body: |
314+
bb.1.entry:
315+
liveins: $x0
316+
; Fold (x + 0) -> x
317+
;
318+
; CHECK-LABEL: name: right_ident_ptr_add
319+
; CHECK: liveins: $x0
320+
; CHECK: %x:_(p0) = COPY $x0
321+
; CHECK: $x0 = COPY %x(p0)
322+
; CHECK: RET_ReallyLR implicit $x0
323+
%x:_(p0) = COPY $x0
324+
%cst:_(s64) = G_CONSTANT i64 0
325+
%op:_(p0) = G_PTR_ADD %x(p0), %cst
326+
$x0 = COPY %op(p0)
327+
RET_ReallyLR implicit $x0

llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll

Lines changed: 31 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -178,18 +178,18 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
178178
; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
179179
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5
180180
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
181-
; GFX9-NEXT: s_add_u32 s2, 4, 0
182-
; GFX9-NEXT: v_mov_b32_e32 v0, 15
183-
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
181+
; GFX9-NEXT: s_mov_b32 vcc_hi, 0
182+
; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc
183+
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
184184
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
185185
; GFX9-NEXT: s_and_b32 s0, s0, 15
186186
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
187-
; GFX9-NEXT: s_add_u32 s1, 0x104, s1
188-
; GFX9-NEXT: scratch_load_dword v1, off, s2 glc
189187
; GFX9-NEXT: s_waitcnt vmcnt(0)
190-
; GFX9-NEXT: s_add_u32 s0, 0x104, s0
188+
; GFX9-NEXT: v_mov_b32_e32 v0, 15
189+
; GFX9-NEXT: s_add_u32 s1, 0x104, s1
191190
; GFX9-NEXT: scratch_store_dword off, v0, s1
192191
; GFX9-NEXT: s_waitcnt vmcnt(0)
192+
; GFX9-NEXT: s_add_u32 s0, 0x104, s0
193193
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
194194
; GFX9-NEXT: s_waitcnt vmcnt(0)
195195
; GFX9-NEXT: s_endpgm
@@ -201,8 +201,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
201201
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
202202
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
203203
; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24
204-
; GFX10-NEXT: s_add_u32 s1, 4, 0
205-
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
204+
; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc
206205
; GFX10-NEXT: s_waitcnt vmcnt(0)
207206
; GFX10-NEXT: v_mov_b32_e32 v0, 15
208207
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
@@ -237,8 +236,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
237236
; GFX9: ; %bb.0: ; %bb
238237
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3
239238
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
240-
; GFX9-NEXT: s_add_u32 s0, 4, 0
241-
; GFX9-NEXT: scratch_load_dword v1, off, s0 glc
239+
; GFX9-NEXT: s_mov_b32 vcc_hi, 0
240+
; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc
242241
; GFX9-NEXT: s_waitcnt vmcnt(0)
243242
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
244243
; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0
@@ -263,11 +262,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
263262
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
264263
; GFX10-NEXT: v_mov_b32_e32 v2, 0x104
265264
; GFX10-NEXT: v_mov_b32_e32 v3, 15
266-
; GFX10-NEXT: s_add_u32 s0, 4, 0
267265
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1
268266
; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
269267
; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1
270-
; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc
268+
; GFX10-NEXT: scratch_load_dword v2, off, off offset:4 glc dlc
271269
; GFX10-NEXT: s_waitcnt vmcnt(0)
272270
; GFX10-NEXT: scratch_store_dword v0, v3, off
273271
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@@ -296,8 +294,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
296294
; GFX9-LABEL: store_load_vindex_small_offset_foo:
297295
; GFX9: ; %bb.0: ; %bb
298296
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299-
; GFX9-NEXT: s_add_u32 s0, s32, 0
300-
; GFX9-NEXT: scratch_load_dword v1, off, s0 glc
297+
; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
301298
; GFX9-NEXT: s_waitcnt vmcnt(0)
302299
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x100
303300
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
@@ -323,10 +320,9 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
323320
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
324321
; GFX10-NEXT: v_mov_b32_e32 v3, 15
325322
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1
326-
; GFX10-NEXT: s_add_u32 s0, s32, 0
327323
; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
328324
; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1
329-
; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc
325+
; GFX10-NEXT: scratch_load_dword v2, off, s32 glc dlc
330326
; GFX10-NEXT: s_waitcnt vmcnt(0)
331327
; GFX10-NEXT: scratch_store_dword v0, v3, off
332328
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@@ -355,18 +351,18 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
355351
; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
356352
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5
357353
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
358-
; GFX9-NEXT: s_add_u32 s2, 4, 0
359-
; GFX9-NEXT: v_mov_b32_e32 v0, 15
360-
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
354+
; GFX9-NEXT: s_mov_b32 vcc_hi, 0
355+
; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc
356+
; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
361357
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
362358
; GFX9-NEXT: s_and_b32 s0, s0, 15
363359
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
364-
; GFX9-NEXT: s_add_u32 s1, 0x4004, s1
365-
; GFX9-NEXT: scratch_load_dword v1, off, s2 glc
366360
; GFX9-NEXT: s_waitcnt vmcnt(0)
367-
; GFX9-NEXT: s_add_u32 s0, 0x4004, s0
361+
; GFX9-NEXT: v_mov_b32_e32 v0, 15
362+
; GFX9-NEXT: s_add_u32 s1, 0x4004, s1
368363
; GFX9-NEXT: scratch_store_dword off, v0, s1
369364
; GFX9-NEXT: s_waitcnt vmcnt(0)
365+
; GFX9-NEXT: s_add_u32 s0, 0x4004, s0
370366
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
371367
; GFX9-NEXT: s_waitcnt vmcnt(0)
372368
; GFX9-NEXT: s_endpgm
@@ -378,8 +374,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
378374
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
379375
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
380376
; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24
381-
; GFX10-NEXT: s_add_u32 s1, 4, 0
382-
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
377+
; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc
383378
; GFX10-NEXT: s_waitcnt vmcnt(0)
384379
; GFX10-NEXT: v_mov_b32_e32 v0, 15
385380
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
@@ -414,8 +409,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
414409
; GFX9: ; %bb.0: ; %bb
415410
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3
416411
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
417-
; GFX9-NEXT: s_add_u32 s0, 4, 0
418-
; GFX9-NEXT: scratch_load_dword v1, off, s0 glc
412+
; GFX9-NEXT: s_mov_b32 vcc_hi, 0
413+
; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc
419414
; GFX9-NEXT: s_waitcnt vmcnt(0)
420415
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
421416
; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0
@@ -440,11 +435,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
440435
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
441436
; GFX10-NEXT: v_mov_b32_e32 v2, 0x4004
442437
; GFX10-NEXT: v_mov_b32_e32 v3, 15
443-
; GFX10-NEXT: s_add_u32 s0, 4, 0
444438
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1
445439
; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
446440
; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1
447-
; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc
441+
; GFX10-NEXT: scratch_load_dword v2, off, off offset:4 glc dlc
448442
; GFX10-NEXT: s_waitcnt vmcnt(0)
449443
; GFX10-NEXT: scratch_store_dword v0, v3, off
450444
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@@ -473,8 +467,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
473467
; GFX9-LABEL: store_load_vindex_large_offset_foo:
474468
; GFX9: ; %bb.0: ; %bb
475469
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476-
; GFX9-NEXT: s_add_u32 s0, s32, 0
477-
; GFX9-NEXT: scratch_load_dword v1, off, s0 glc
470+
; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
478471
; GFX9-NEXT: s_waitcnt vmcnt(0)
479472
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
480473
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
@@ -500,10 +493,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
500493
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
501494
; GFX10-NEXT: v_mov_b32_e32 v3, 15
502495
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1
503-
; GFX10-NEXT: s_add_u32 s0, s32, 0
504496
; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
505497
; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1
506-
; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc
498+
; GFX10-NEXT: scratch_load_dword v2, off, s32 glc dlc
507499
; GFX10-NEXT: s_waitcnt vmcnt(0)
508500
; GFX10-NEXT: scratch_store_dword v0, v3, off
509501
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@@ -531,11 +523,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
531523
; GFX9: ; %bb.0: ; %bb
532524
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3
533525
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
526+
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
534527
; GFX9-NEXT: v_mov_b32_e32 v0, 13
535-
; GFX9-NEXT: s_add_u32 s0, 4, 0
536-
; GFX9-NEXT: scratch_store_dword off, v0, s0
528+
; GFX9-NEXT: s_mov_b32 vcc_hi, 0
529+
; GFX9-NEXT: scratch_store_dword off, v0, vcc_hi offset:4
537530
; GFX9-NEXT: s_waitcnt vmcnt(0)
538-
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
539531
; GFX9-NEXT: v_mov_b32_e32 v0, 15
540532
; GFX9-NEXT: s_add_u32 s0, 4, s0
541533
; GFX9-NEXT: scratch_store_dword off, v0, s0
@@ -553,9 +545,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
553545
; GFX10-NEXT: v_mov_b32_e32 v0, 13
554546
; GFX10-NEXT: v_mov_b32_e32 v1, 15
555547
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
556-
; GFX10-NEXT: s_add_u32 s1, 4, 0
557548
; GFX10-NEXT: s_add_u32 s0, 4, s0
558-
; GFX10-NEXT: scratch_store_dword off, v0, s1
549+
; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
559550
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
560551
; GFX10-NEXT: scratch_store_dword off, v1, s0
561552
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@@ -577,11 +568,10 @@ define void @store_load_large_imm_offset_foo() {
577568
; GFX9-LABEL: store_load_large_imm_offset_foo:
578569
; GFX9: ; %bb.0: ; %bb
579570
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571+
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
580572
; GFX9-NEXT: v_mov_b32_e32 v0, 13
581-
; GFX9-NEXT: s_add_u32 s0, s32, 0
582-
; GFX9-NEXT: scratch_store_dword off, v0, s0
573+
; GFX9-NEXT: scratch_store_dword off, v0, s32
583574
; GFX9-NEXT: s_waitcnt vmcnt(0)
584-
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
585575
; GFX9-NEXT: v_mov_b32_e32 v0, 15
586576
; GFX9-NEXT: s_add_u32 s0, s32, s0
587577
; GFX9-NEXT: scratch_store_dword off, v0, s0
@@ -597,9 +587,8 @@ define void @store_load_large_imm_offset_foo() {
597587
; GFX10-NEXT: v_mov_b32_e32 v0, 13
598588
; GFX10-NEXT: v_mov_b32_e32 v1, 15
599589
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
600-
; GFX10-NEXT: s_add_u32 s1, s32, 0
601590
; GFX10-NEXT: s_add_u32 s0, s32, s0
602-
; GFX10-NEXT: scratch_store_dword off, v0, s1
591+
; GFX10-NEXT: scratch_store_dword off, v0, s32
603592
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
604593
; GFX10-NEXT: scratch_store_dword off, v1, s0
605594
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0

0 commit comments

Comments
 (0)