Skip to content

Commit a7fed73

Browse files
committed
Freeze hi-half since it will be used twice
Signed-off-by: John Lu <[email protected]>
1 parent 00d40a6 commit a7fed73

File tree

6 files changed

+123
-117
lines changed

6 files changed

+123
-117
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4157,6 +4157,7 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
41574157
// (sra i64:x, C) -> build_pair (sra hi_32(x), C - 32), (sra hi_32(x), 31)
41584158
if (32 <= RHSVal) {
41594159
SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
4160+
Hi = DAG.getFreeze(Hi);
41604161
SDValue HiShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
41614162
DAG.getConstant(31, SL, MVT::i32));
41624163
SDValue LoShift;

llvm/test/CodeGen/AMDGPU/fptoi.i128.ll

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,15 +1433,25 @@ define i128 @fptoui_f32_to_i128(float %x) {
14331433
}
14341434

14351435
define i128 @fptosi_f16_to_i128(half %x) {
1436-
; GCN-LABEL: fptosi_f16_to_i128:
1437-
; GCN: ; %bb.0:
1438-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1439-
; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0
1440-
; GCN-NEXT: v_cvt_i32_f32_e32 v0, v0
1441-
; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1442-
; GCN-NEXT: v_mov_b32_e32 v2, v1
1443-
; GCN-NEXT: v_mov_b32_e32 v3, v1
1444-
; GCN-NEXT: s_setpc_b64 s[30:31]
1436+
; SDAG-LABEL: fptosi_f16_to_i128:
1437+
; SDAG: ; %bb.0:
1438+
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1439+
; SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1440+
; SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0
1441+
; SDAG-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1442+
; SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1
1443+
; SDAG-NEXT: v_mov_b32_e32 v3, v2
1444+
; SDAG-NEXT: s_setpc_b64 s[30:31]
1445+
;
1446+
; GISEL-LABEL: fptosi_f16_to_i128:
1447+
; GISEL: ; %bb.0:
1448+
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1449+
; GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
1450+
; GISEL-NEXT: v_cvt_i32_f32_e32 v0, v0
1451+
; GISEL-NEXT: v_ashrrev_i32_e32 v1, 31, v0
1452+
; GISEL-NEXT: v_mov_b32_e32 v2, v1
1453+
; GISEL-NEXT: v_mov_b32_e32 v3, v1
1454+
; GISEL-NEXT: s_setpc_b64 s[30:31]
14451455
%cvt = fptosi half %x to i128
14461456
ret i128 %cvt
14471457
}

llvm/test/CodeGen/AMDGPU/load-constant-i16.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1643,15 +1643,15 @@ define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(ptr addrspace(1) %ou
16431643
; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
16441644
; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000
16451645
; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0)
1646-
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s6, s5, 16
1647-
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s7, s4, 16
1646+
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s6, s4, 16
1647+
; GCN-NOHSA-SI-NEXT: s_ashr_i32 s7, s5, 16
16481648
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5
16491649
; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4
16501650
; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1
16511651
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4
1652-
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7
1652+
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s6
16531653
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5
1654-
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s6
1654+
; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7
16551655
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
16561656
; GCN-NOHSA-SI-NEXT: s_endpgm
16571657
;
@@ -1666,14 +1666,14 @@ define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(ptr addrspace(1) %ou
16661666
; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0
16671667
; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1
16681668
; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0)
1669-
; GCN-HSA-NEXT: s_ashr_i32 s0, s3, 16
1670-
; GCN-HSA-NEXT: s_ashr_i32 s1, s2, 16
1669+
; GCN-HSA-NEXT: s_ashr_i32 s0, s2, 16
1670+
; GCN-HSA-NEXT: s_ashr_i32 s1, s3, 16
16711671
; GCN-HSA-NEXT: s_sext_i32_i16 s3, s3
16721672
; GCN-HSA-NEXT: s_sext_i32_i16 s2, s2
16731673
; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2
1674-
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1
1674+
; GCN-HSA-NEXT: v_mov_b32_e32 v1, s0
16751675
; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3
1676-
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0
1676+
; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1
16771677
; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
16781678
; GCN-HSA-NEXT: s_endpgm
16791679
;

llvm/test/CodeGen/AMDGPU/load-global-i16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1738,8 +1738,8 @@ define amdgpu_kernel void @global_sextload_v4i16_to_v4i32(ptr addrspace(1) %out,
17381738
; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0
17391739
; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1
17401740
; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0)
1741-
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v5
17421741
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v4
1742+
; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v5
17431743
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v5, 0, 16
17441744
; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v4, 0, 16
17451745
; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
@@ -1758,8 +1758,8 @@ define amdgpu_kernel void @global_sextload_v4i16_to_v4i32(ptr addrspace(1) %out,
17581758
; GCN-HSA-NEXT: v_mov_b32_e32 v6, s0
17591759
; GCN-HSA-NEXT: v_mov_b32_e32 v7, s1
17601760
; GCN-HSA-NEXT: s_waitcnt vmcnt(0)
1761-
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v5
17621761
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v4
1762+
; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v5
17631763
; GCN-HSA-NEXT: v_bfe_i32 v2, v5, 0, 16
17641764
; GCN-HSA-NEXT: v_bfe_i32 v0, v4, 0, 16
17651765
; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3]

llvm/test/CodeGen/AMDGPU/mul_int24.ll

Lines changed: 39 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -463,39 +463,41 @@ define amdgpu_kernel void @test_smul24_i64_square(ptr addrspace(1) %out, i32 %a,
463463
define amdgpu_kernel void @test_smul24_i33(ptr addrspace(1) %out, i33 %a, i33 %b) #0 {
464464
; SI-LABEL: test_smul24_i33:
465465
; SI: ; %bb.0: ; %entry
466-
; SI-NEXT: s_load_dword s6, s[4:5], 0xd
467-
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
468-
; SI-NEXT: s_load_dword s4, s[4:5], 0xb
469-
; SI-NEXT: s_mov_b32 s3, 0xf000
470-
; SI-NEXT: s_mov_b32 s2, -1
466+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
467+
; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
468+
; SI-NEXT: s_mov_b32 s7, 0xf000
469+
; SI-NEXT: s_mov_b32 s6, -1
471470
; SI-NEXT: s_waitcnt lgkmcnt(0)
472-
; SI-NEXT: s_bfe_i32 s5, s6, 0x180000
473-
; SI-NEXT: s_bfe_i32 s4, s4, 0x180000
474-
; SI-NEXT: v_mov_b32_e32 v0, s5
475-
; SI-NEXT: s_mul_i32 s5, s4, s5
476-
; SI-NEXT: v_mul_hi_i32_i24_e32 v1, s4, v0
477-
; SI-NEXT: v_mov_b32_e32 v0, s5
471+
; SI-NEXT: s_mov_b32 s4, s0
472+
; SI-NEXT: s_mov_b32 s5, s1
473+
; SI-NEXT: s_bfe_i32 s0, s8, 0x180000
474+
; SI-NEXT: s_bfe_i32 s1, s2, 0x180000
475+
; SI-NEXT: v_mov_b32_e32 v0, s0
476+
; SI-NEXT: s_mul_i32 s0, s1, s0
477+
; SI-NEXT: v_mul_hi_i32_i24_e32 v1, s1, v0
478+
; SI-NEXT: v_mov_b32_e32 v0, s0
478479
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
479480
; SI-NEXT: v_ashr_i64 v[0:1], v[0:1], 31
480-
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
481+
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
481482
; SI-NEXT: s_endpgm
482483
;
483484
; VI-LABEL: test_smul24_i33:
484485
; VI: ; %bb.0: ; %entry
485-
; VI-NEXT: s_load_dword s2, s[4:5], 0x2c
486-
; VI-NEXT: s_load_dword s3, s[4:5], 0x34
487-
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
486+
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
487+
; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
488+
; VI-NEXT: s_mov_b32 s7, 0xf000
489+
; VI-NEXT: s_mov_b32 s6, -1
488490
; VI-NEXT: s_waitcnt lgkmcnt(0)
489491
; VI-NEXT: s_bfe_i32 s2, s2, 0x180000
490-
; VI-NEXT: s_bfe_i32 s3, s3, 0x180000
492+
; VI-NEXT: s_bfe_i32 s3, s4, 0x180000
491493
; VI-NEXT: v_mov_b32_e32 v0, s3
492494
; VI-NEXT: v_mul_hi_i32_i24_e32 v1, s2, v0
493495
; VI-NEXT: v_mul_i32_i24_e32 v0, s2, v0
494496
; VI-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
495-
; VI-NEXT: s_mov_b32 s3, 0xf000
497+
; VI-NEXT: s_mov_b32 s4, s0
496498
; VI-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1]
497-
; VI-NEXT: s_mov_b32 s2, -1
498-
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
499+
; VI-NEXT: s_mov_b32 s5, s1
500+
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
499501
; VI-NEXT: s_endpgm
500502
;
501503
; GFX9-LABEL: test_smul24_i33:
@@ -574,30 +576,32 @@ entry:
574576
define amdgpu_kernel void @test_smulhi24_i33(ptr addrspace(1) %out, i33 %a, i33 %b) {
575577
; SI-LABEL: test_smulhi24_i33:
576578
; SI: ; %bb.0: ; %entry
577-
; SI-NEXT: s_load_dword s6, s[4:5], 0xd
578-
; SI-NEXT: s_load_dword s7, s[4:5], 0xb
579-
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
580-
; SI-NEXT: s_mov_b32 s3, 0xf000
581-
; SI-NEXT: s_mov_b32 s2, -1
579+
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
580+
; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
581+
; SI-NEXT: s_mov_b32 s7, 0xf000
582+
; SI-NEXT: s_mov_b32 s6, -1
582583
; SI-NEXT: s_waitcnt lgkmcnt(0)
583-
; SI-NEXT: v_mov_b32_e32 v0, s6
584-
; SI-NEXT: v_mul_hi_i32_i24_e32 v0, s7, v0
584+
; SI-NEXT: s_mov_b32 s4, s0
585+
; SI-NEXT: s_mov_b32 s5, s1
586+
; SI-NEXT: v_mov_b32_e32 v0, s8
587+
; SI-NEXT: v_mul_hi_i32_i24_e32 v0, s2, v0
585588
; SI-NEXT: v_and_b32_e32 v0, 1, v0
586-
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
589+
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
587590
; SI-NEXT: s_endpgm
588591
;
589592
; VI-LABEL: test_smulhi24_i33:
590593
; VI: ; %bb.0: ; %entry
591-
; VI-NEXT: s_load_dword s6, s[4:5], 0x34
592-
; VI-NEXT: s_load_dword s7, s[4:5], 0x2c
593-
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
594-
; VI-NEXT: s_mov_b32 s3, 0xf000
595-
; VI-NEXT: s_mov_b32 s2, -1
594+
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
595+
; VI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34
596+
; VI-NEXT: s_mov_b32 s7, 0xf000
597+
; VI-NEXT: s_mov_b32 s6, -1
596598
; VI-NEXT: s_waitcnt lgkmcnt(0)
597-
; VI-NEXT: v_mov_b32_e32 v0, s6
598-
; VI-NEXT: v_mul_hi_i32_i24_e32 v0, s7, v0
599+
; VI-NEXT: s_mov_b32 s4, s0
600+
; VI-NEXT: v_mov_b32_e32 v0, s8
601+
; VI-NEXT: v_mul_hi_i32_i24_e32 v0, s2, v0
602+
; VI-NEXT: s_mov_b32 s5, s1
599603
; VI-NEXT: v_and_b32_e32 v0, 1, v0
600-
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
604+
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
601605
; VI-NEXT: s_endpgm
602606
;
603607
; GFX9-LABEL: test_smulhi24_i33:

0 commit comments

Comments
 (0)