Skip to content

Commit 7576732

Browse files
committed
[TargetLowering] Add a new function getNullPtrValue
In most cases, `nullptr` is a zero-value constant with the corresponding pointer type. However, this is not always the case. For example, AMDGPU uses `0xffffffff` as nullptr for AS3 and AS5, leading to lowering issues. Currently, to ensure correct lowering, `ptr addrspace(5) null` must be written as `addrspacecast (ptr null to ptr addrspace(5))`. This PR introduces `TargetLowering::getNullPtrValue` to determine the correct value of `nullptr`. This helps with proper lowering of `ConstantPointerNull`, which already has the correct address space. Fixes #115083.
1 parent 595195e commit 7576732

31 files changed

+459
-298
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5659,6 +5659,13 @@ class TargetLowering : public TargetLoweringBase {
56595659
LoadSDNode *OriginalLoad,
56605660
SelectionDAG &DAG) const;
56615661

5662+
/// Return the value of nullptr. In most cases, nullptr is a zero-value
5663+
/// constant with the corresponding pointer type. However, this is not always
5664+
/// the case. For certain address spaces on some targets, it could be a value
5665+
/// like ~0U.
5666+
virtual SDValue getNullPtrValue(unsigned AS, const SDLoc &DL,
5667+
SelectionDAG &DAG) const;
5668+
56625669
private:
56635670
SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
56645671
const SDLoc &DL, DAGCombinerInfo &DCI) const;

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1807,8 +1807,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
18071807

18081808
if (isa<ConstantPointerNull>(C)) {
18091809
unsigned AS = V->getType()->getPointerAddressSpace();
1810-
return DAG.getConstant(0, getCurSDLoc(),
1811-
TLI.getPointerTy(DAG.getDataLayout(), AS));
1810+
return TLI.getNullPtrValue(AS, getCurSDLoc(), DAG);
18121811
}
18131812

18141813
if (match(C, m_VScale()))

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12188,3 +12188,8 @@ SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
1218812188

1218912189
return Load;
1219012190
}
12191+
12192+
SDValue TargetLowering::getNullPtrValue(unsigned AS, const SDLoc &DL,
12193+
SelectionDAG &DAG) const {
12194+
return DAG.getConstant(0, DL, getPointerTy(DAG.getDataLayout(), AS));
12195+
}

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6050,3 +6050,12 @@ bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
60506050
Register N0, Register N1) const {
60516051
return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
60526052
}
6053+
6054+
SDValue AMDGPUTargetLowering::getNullPtrValue(unsigned AS, const SDLoc &DL,
6055+
SelectionDAG &DAG) const {
6056+
if (AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::LOCAL_ADDRESS) {
6057+
return DAG.getConstant(0xffffffff, DL,
6058+
getPointerTy(DAG.getDataLayout(), AS));
6059+
}
6060+
return TargetLowering::getNullPtrValue(AS, DL, DAG);
6061+
}

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,9 @@ class AMDGPUTargetLowering : public TargetLowering {
387387
MVT getFenceOperandTy(const DataLayout &DL) const override {
388388
return MVT::i32;
389389
}
390+
391+
SDValue getNullPtrValue(unsigned AS, const SDLoc &DL,
392+
SelectionDAG &DAG) const override;
390393
};
391394

392395
namespace AMDGPUISD {

llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

Lines changed: 37 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -521,11 +521,12 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
521521
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
522522
; GFX908-NEXT: v_cvt_f32_u32_e32 v0, s3
523523
; GFX908-NEXT: s_sub_i32 s8, 0, s3
524-
; GFX908-NEXT: v_cvt_f32_f16_e32 v17, s7
525-
; GFX908-NEXT: v_mov_b32_e32 v19, 0
524+
; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s7
525+
; GFX908-NEXT: v_mov_b32_e32 v17, 0
526526
; GFX908-NEXT: v_rcp_iflag_f32_e32 v2, v0
527527
; GFX908-NEXT: v_mov_b32_e32 v0, 0
528528
; GFX908-NEXT: v_mov_b32_e32 v1, 0
529+
; GFX908-NEXT: v_mov_b32_e32 v20, -1
529530
; GFX908-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
530531
; GFX908-NEXT: v_cvt_u32_f32_e32 v2, v2
531532
; GFX908-NEXT: v_readfirstlane_b32 s10, v2
@@ -544,7 +545,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
544545
; GFX908-NEXT: s_cmp_ge_u32 s2, s3
545546
; GFX908-NEXT: s_cselect_b32 s8, s10, s8
546547
; GFX908-NEXT: s_lshr_b32 s7, s7, 16
547-
; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s7
548+
; GFX908-NEXT: v_cvt_f32_f16_e32 v19, s7
548549
; GFX908-NEXT: s_lshl_b64 s[2:3], s[0:1], 5
549550
; GFX908-NEXT: s_lshl_b64 s[12:13], s[8:9], 5
550551
; GFX908-NEXT: s_lshl_b64 s[10:11], s[4:5], 5
@@ -611,37 +612,37 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
611612
; GFX908-NEXT: ; => This Inner Loop Header: Depth=2
612613
; GFX908-NEXT: s_add_u32 s20, s18, s7
613614
; GFX908-NEXT: s_addc_u32 s21, s19, s9
614-
; GFX908-NEXT: global_load_dword v21, v19, s[20:21] offset:-12 glc
615+
; GFX908-NEXT: global_load_dword v22, v17, s[20:21] offset:-12 glc
615616
; GFX908-NEXT: s_waitcnt vmcnt(0)
616-
; GFX908-NEXT: global_load_dword v20, v19, s[20:21] offset:-8 glc
617+
; GFX908-NEXT: global_load_dword v21, v17, s[20:21] offset:-8 glc
617618
; GFX908-NEXT: s_waitcnt vmcnt(0)
618-
; GFX908-NEXT: global_load_dword v12, v19, s[20:21] offset:-4 glc
619+
; GFX908-NEXT: global_load_dword v12, v17, s[20:21] offset:-4 glc
619620
; GFX908-NEXT: s_waitcnt vmcnt(0)
620-
; GFX908-NEXT: global_load_dword v12, v19, s[20:21] glc
621+
; GFX908-NEXT: global_load_dword v12, v17, s[20:21] glc
621622
; GFX908-NEXT: s_waitcnt vmcnt(0)
622-
; GFX908-NEXT: ds_read_b64 v[12:13], v19
623+
; GFX908-NEXT: ds_read_b64 v[12:13], v20
623624
; GFX908-NEXT: ds_read_b64 v[14:15], v0
624625
; GFX908-NEXT: s_and_b64 vcc, exec, s[0:1]
625626
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
626627
; GFX908-NEXT: s_cbranch_vccnz .LBB3_7
627628
; GFX908-NEXT: ; %bb.6: ; %bb51
628629
; GFX908-NEXT: ; in Loop: Header=BB3_5 Depth=2
629-
; GFX908-NEXT: v_cvt_f32_f16_sdwa v22, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
630+
; GFX908-NEXT: v_cvt_f32_f16_sdwa v23, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
631+
; GFX908-NEXT: v_cvt_f32_f16_e32 v22, v22
632+
; GFX908-NEXT: v_cvt_f32_f16_sdwa v24, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
630633
; GFX908-NEXT: v_cvt_f32_f16_e32 v21, v21
631-
; GFX908-NEXT: v_cvt_f32_f16_sdwa v23, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
632-
; GFX908-NEXT: v_cvt_f32_f16_e32 v20, v20
633-
; GFX908-NEXT: v_add_f32_e32 v24, v17, v12
634-
; GFX908-NEXT: v_add_f32_e32 v25, v18, v13
635-
; GFX908-NEXT: v_add_f32_e32 v26, 0, v12
636-
; GFX908-NEXT: v_add_f32_e32 v27, 0, v13
637-
; GFX908-NEXT: v_add_f32_e32 v15, v22, v15
638-
; GFX908-NEXT: v_add_f32_e32 v14, v21, v14
639-
; GFX908-NEXT: v_add_f32_e32 v13, v23, v13
640-
; GFX908-NEXT: v_add_f32_e32 v12, v20, v12
641-
; GFX908-NEXT: v_add_f32_e32 v5, v5, v25
642-
; GFX908-NEXT: v_add_f32_e32 v4, v4, v24
643-
; GFX908-NEXT: v_add_f32_e32 v7, v7, v27
644-
; GFX908-NEXT: v_add_f32_e32 v6, v6, v26
634+
; GFX908-NEXT: v_add_f32_e32 v25, v18, v12
635+
; GFX908-NEXT: v_add_f32_e32 v26, v19, v13
636+
; GFX908-NEXT: v_add_f32_e32 v27, 0, v12
637+
; GFX908-NEXT: v_add_f32_e32 v28, 0, v13
638+
; GFX908-NEXT: v_add_f32_e32 v15, v23, v15
639+
; GFX908-NEXT: v_add_f32_e32 v14, v22, v14
640+
; GFX908-NEXT: v_add_f32_e32 v13, v24, v13
641+
; GFX908-NEXT: v_add_f32_e32 v12, v21, v12
642+
; GFX908-NEXT: v_add_f32_e32 v5, v5, v26
643+
; GFX908-NEXT: v_add_f32_e32 v4, v4, v25
644+
; GFX908-NEXT: v_add_f32_e32 v7, v7, v28
645+
; GFX908-NEXT: v_add_f32_e32 v6, v6, v27
645646
; GFX908-NEXT: v_add_f32_e32 v8, v8, v14
646647
; GFX908-NEXT: v_add_f32_e32 v9, v9, v15
647648
; GFX908-NEXT: v_add_f32_e32 v10, v10, v12
@@ -686,6 +687,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
686687
; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3
687688
; GFX90A-NEXT: s_sub_i32 s8, 0, s3
688689
; GFX90A-NEXT: v_mov_b32_e32 v19, 0
690+
; GFX90A-NEXT: v_mov_b32_e32 v20, -1
689691
; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v0
690692
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], 0, 0
691693
; GFX90A-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
@@ -770,32 +772,32 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
770772
; GFX90A-NEXT: ; => This Inner Loop Header: Depth=2
771773
; GFX90A-NEXT: s_add_u32 s20, s18, s7
772774
; GFX90A-NEXT: s_addc_u32 s21, s19, s9
773-
; GFX90A-NEXT: global_load_dword v21, v19, s[20:21] offset:-12 glc
775+
; GFX90A-NEXT: global_load_dword v22, v19, s[20:21] offset:-12 glc
774776
; GFX90A-NEXT: s_waitcnt vmcnt(0)
775-
; GFX90A-NEXT: global_load_dword v20, v19, s[20:21] offset:-8 glc
777+
; GFX90A-NEXT: global_load_dword v21, v19, s[20:21] offset:-8 glc
776778
; GFX90A-NEXT: s_waitcnt vmcnt(0)
777779
; GFX90A-NEXT: global_load_dword v14, v19, s[20:21] offset:-4 glc
778780
; GFX90A-NEXT: s_waitcnt vmcnt(0)
779781
; GFX90A-NEXT: global_load_dword v14, v19, s[20:21] glc
780782
; GFX90A-NEXT: s_waitcnt vmcnt(0)
781-
; GFX90A-NEXT: ds_read_b64 v[14:15], v19
783+
; GFX90A-NEXT: ds_read_b64 v[14:15], v20
782784
; GFX90A-NEXT: ds_read_b64 v[16:17], v0
783785
; GFX90A-NEXT: s_and_b64 vcc, exec, s[0:1]
784786
; GFX90A-NEXT: ; kill: killed $sgpr20 killed $sgpr21
785787
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
786788
; GFX90A-NEXT: s_cbranch_vccnz .LBB3_7
787789
; GFX90A-NEXT: ; %bb.6: ; %bb51
788790
; GFX90A-NEXT: ; in Loop: Header=BB3_5 Depth=2
789-
; GFX90A-NEXT: v_cvt_f32_f16_sdwa v23, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
790-
; GFX90A-NEXT: v_cvt_f32_f16_e32 v22, v21
791-
; GFX90A-NEXT: v_cvt_f32_f16_sdwa v21, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
792-
; GFX90A-NEXT: v_cvt_f32_f16_e32 v20, v20
793-
; GFX90A-NEXT: v_pk_add_f32 v[24:25], v[2:3], v[14:15]
794-
; GFX90A-NEXT: v_pk_add_f32 v[26:27], v[14:15], 0 op_sel_hi:[1,0]
791+
; GFX90A-NEXT: v_cvt_f32_f16_sdwa v23, v22 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
792+
; GFX90A-NEXT: v_cvt_f32_f16_e32 v22, v22
793+
; GFX90A-NEXT: v_cvt_f32_f16_sdwa v25, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
794+
; GFX90A-NEXT: v_cvt_f32_f16_e32 v24, v21
795+
; GFX90A-NEXT: v_pk_add_f32 v[26:27], v[2:3], v[14:15]
796+
; GFX90A-NEXT: v_pk_add_f32 v[28:29], v[14:15], 0 op_sel_hi:[1,0]
795797
; GFX90A-NEXT: v_pk_add_f32 v[16:17], v[22:23], v[16:17]
796-
; GFX90A-NEXT: v_pk_add_f32 v[14:15], v[20:21], v[14:15]
797-
; GFX90A-NEXT: v_pk_add_f32 v[6:7], v[6:7], v[24:25]
798-
; GFX90A-NEXT: v_pk_add_f32 v[8:9], v[8:9], v[26:27]
798+
; GFX90A-NEXT: v_pk_add_f32 v[14:15], v[24:25], v[14:15]
799+
; GFX90A-NEXT: v_pk_add_f32 v[6:7], v[6:7], v[26:27]
800+
; GFX90A-NEXT: v_pk_add_f32 v[8:9], v[8:9], v[28:29]
799801
; GFX90A-NEXT: v_pk_add_f32 v[10:11], v[10:11], v[16:17]
800802
; GFX90A-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[14:15]
801803
; GFX90A-NEXT: s_mov_b64 s[20:21], -1

llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,12 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
7373
; CHECK-NEXT: s_mov_b32 s38, s36
7474
; CHECK-NEXT: s_mov_b32 s39, s36
7575
; CHECK-NEXT: .LBB0_7: ; %if.end294.i.i
76-
; CHECK-NEXT: v_mov_b32_e32 v0, 0
77-
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
78-
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
79-
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
80-
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
76+
; CHECK-NEXT: v_mov_b32_e32 v0, -1
77+
; CHECK-NEXT: v_mov_b32_e32 v1, 0
78+
; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
79+
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:11
80+
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:7
81+
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:3
8182
; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit
8283
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20
8384
; CHECK-NEXT: v_mov_b32_e32 v0, s36

0 commit comments

Comments
 (0)