Skip to content

Commit 7f5a128

Browse files
committed
[llvm][CodeGen] Intrinsic llvm.powi.* code gen for vector arguments
In some backends, the i32 type is illegal and will be promoted. This causes exponent type check to fail when ISD::FOWI node generates a libcall.
1 parent 574f64c commit 7f5a128

File tree

4 files changed

+1675
-0
lines changed

4 files changed

+1675
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4648,6 +4648,24 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
46484648
bool ExponentHasSizeOfInt =
46494649
DAG.getLibInfo().getIntSize() ==
46504650
Node->getOperand(1 + Offset).getValueType().getSizeInBits();
4651+
if (!ExponentHasSizeOfInt) {
4652+
// In some backends, such as RISCV64 and LoongArch64, the i32 type is
4653+
// illegal and is promoted by previous process. For such cases, the
4654+
// exponent actually matches with sizeof(int) and a libcall should be
4655+
// generated.
4656+
SDNode *ExponentNode = Node->getOperand(1 + Offset).getNode();
4657+
unsigned LibIntSize = DAG.getLibInfo().getIntSize();
4658+
if (ExponentNode->getOpcode() == ISD::SIGN_EXTEND_INREG ||
4659+
ExponentNode->getOpcode() == ISD::AssertSext ||
4660+
ExponentNode->getOpcode() == ISD::AssertZext) {
4661+
EVT InnerType = cast<VTSDNode>(ExponentNode->getOperand(1))->getVT();
4662+
ExponentHasSizeOfInt = LibIntSize == InnerType.getSizeInBits();
4663+
} else if (ISD::isExtOpcode(ExponentNode->getOpcode())) {
4664+
ExponentHasSizeOfInt =
4665+
LibIntSize ==
4666+
ExponentNode->getOperand(0).getValueType().getSizeInBits();
4667+
}
4668+
}
46514669
if (!ExponentHasSizeOfInt) {
46524670
// If the exponent does not match with sizeof(int) a libcall to
46534671
// RTLIB::POWI would use the wrong type for the argument.
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
3+
4+
declare <8 x float> @llvm.powi.v8f32.i32(<8 x float>, i32)
5+
6+
define <8 x float> @powi_v8f32(<8 x float> %va, i32 %b) nounwind {
7+
; CHECK-LABEL: powi_v8f32:
8+
; CHECK: # %bb.0: # %entry
9+
; CHECK-NEXT: addi.d $sp, $sp, -80
10+
; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
11+
; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
12+
; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill
13+
; CHECK-NEXT: addi.w $fp, $a0, 0
14+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0
15+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
16+
; CHECK-NEXT: move $a0, $fp
17+
; CHECK-NEXT: bl %plt(__powisf2)
18+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
19+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
20+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
21+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
22+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
23+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
24+
; CHECK-NEXT: move $a0, $fp
25+
; CHECK-NEXT: bl %plt(__powisf2)
26+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
27+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
28+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
29+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
30+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
31+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2
32+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
33+
; CHECK-NEXT: move $a0, $fp
34+
; CHECK-NEXT: bl %plt(__powisf2)
35+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
36+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
37+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2
38+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
39+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
40+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
41+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
42+
; CHECK-NEXT: move $a0, $fp
43+
; CHECK-NEXT: bl %plt(__powisf2)
44+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
45+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
46+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3
47+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
48+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
49+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4
50+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
51+
; CHECK-NEXT: move $a0, $fp
52+
; CHECK-NEXT: bl %plt(__powisf2)
53+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
54+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
55+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 4
56+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
57+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
58+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5
59+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
60+
; CHECK-NEXT: move $a0, $fp
61+
; CHECK-NEXT: bl %plt(__powisf2)
62+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
63+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
64+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5
65+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
66+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
67+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 6
68+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
69+
; CHECK-NEXT: move $a0, $fp
70+
; CHECK-NEXT: bl %plt(__powisf2)
71+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
72+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
73+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 6
74+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
75+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
76+
; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
77+
; CHECK-NEXT: movgr2fr.w $fa0, $a0
78+
; CHECK-NEXT: move $a0, $fp
79+
; CHECK-NEXT: bl %plt(__powisf2)
80+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
81+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
82+
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 7
83+
; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
84+
; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
85+
; CHECK-NEXT: addi.d $sp, $sp, 80
86+
; CHECK-NEXT: ret
87+
entry:
88+
%res = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> %va, i32 %b)
89+
ret <8 x float> %res
90+
}
91+
92+
declare <4 x double> @llvm.powi.v4f64.i32(<4 x double>, i32)
93+
94+
define <4 x double> @powi_v4f64(<4 x double> %va, i32 %b) nounwind {
95+
; CHECK-LABEL: powi_v4f64:
96+
; CHECK: # %bb.0: # %entry
97+
; CHECK-NEXT: addi.d $sp, $sp, -80
98+
; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
99+
; CHECK-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill
100+
; CHECK-NEXT: xvst $xr0, $sp, 0 # 32-byte Folded Spill
101+
; CHECK-NEXT: addi.w $fp, $a0, 0
102+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
103+
; CHECK-NEXT: movgr2fr.d $fa0, $a0
104+
; CHECK-NEXT: move $a0, $fp
105+
; CHECK-NEXT: bl %plt(__powidf2)
106+
; CHECK-NEXT: movfr2gr.d $a0, $fa0
107+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
108+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
109+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
110+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
111+
; CHECK-NEXT: movgr2fr.d $fa0, $a0
112+
; CHECK-NEXT: move $a0, $fp
113+
; CHECK-NEXT: bl %plt(__powidf2)
114+
; CHECK-NEXT: movfr2gr.d $a0, $fa0
115+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
116+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1
117+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
118+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
119+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
120+
; CHECK-NEXT: movgr2fr.d $fa0, $a0
121+
; CHECK-NEXT: move $a0, $fp
122+
; CHECK-NEXT: bl %plt(__powidf2)
123+
; CHECK-NEXT: movfr2gr.d $a0, $fa0
124+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
125+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2
126+
; CHECK-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill
127+
; CHECK-NEXT: xvld $xr0, $sp, 0 # 32-byte Folded Reload
128+
; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
129+
; CHECK-NEXT: movgr2fr.d $fa0, $a0
130+
; CHECK-NEXT: move $a0, $fp
131+
; CHECK-NEXT: bl %plt(__powidf2)
132+
; CHECK-NEXT: movfr2gr.d $a0, $fa0
133+
; CHECK-NEXT: xvld $xr0, $sp, 32 # 32-byte Folded Reload
134+
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3
135+
; CHECK-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
136+
; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
137+
; CHECK-NEXT: addi.d $sp, $sp, 80
138+
; CHECK-NEXT: ret
139+
entry:
140+
%res = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> %va, i32 %b)
141+
ret <4 x double> %res
142+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
3+
4+
declare <4 x float> @llvm.powi.v4f32.i32(<4 x float>, i32)
5+
6+
define <4 x float> @powi_v4f32(<4 x float> %va, i32 %b) nounwind {
7+
; CHECK-LABEL: powi_v4f32:
8+
; CHECK: # %bb.0: # %entry
9+
; CHECK-NEXT: addi.d $sp, $sp, -48
10+
; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
11+
; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
12+
; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
13+
; CHECK-NEXT: addi.w $fp, $a0, 0
14+
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
15+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
16+
; CHECK-NEXT: move $a0, $fp
17+
; CHECK-NEXT: bl %plt(__powisf2)
18+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
19+
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
20+
; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
21+
; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
22+
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1
23+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
24+
; CHECK-NEXT: move $a0, $fp
25+
; CHECK-NEXT: bl %plt(__powisf2)
26+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
27+
; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
28+
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 1
29+
; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
30+
; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
31+
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 2
32+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
33+
; CHECK-NEXT: move $a0, $fp
34+
; CHECK-NEXT: bl %plt(__powisf2)
35+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
36+
; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
37+
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2
38+
; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
39+
; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
40+
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3
41+
; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vr0
42+
; CHECK-NEXT: move $a0, $fp
43+
; CHECK-NEXT: bl %plt(__powisf2)
44+
; CHECK-NEXT: movfr2gr.s $a0, $fa0
45+
; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
46+
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3
47+
; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
48+
; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
49+
; CHECK-NEXT: addi.d $sp, $sp, 48
50+
; CHECK-NEXT: ret
51+
entry:
52+
%res = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> %va, i32 %b)
53+
ret <4 x float> %res
54+
}
55+
56+
declare <2 x double> @llvm.powi.v2f64.i32(<2 x double>, i32)
57+
58+
define <2 x double> @powi_v2f64(<2 x double> %va, i32 %b) nounwind {
59+
; CHECK-LABEL: powi_v2f64:
60+
; CHECK: # %bb.0: # %entry
61+
; CHECK-NEXT: addi.d $sp, $sp, -48
62+
; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
63+
; CHECK-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill
64+
; CHECK-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
65+
; CHECK-NEXT: addi.w $fp, $a0, 0
66+
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
67+
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
68+
; CHECK-NEXT: move $a0, $fp
69+
; CHECK-NEXT: bl %plt(__powidf2)
70+
; CHECK-NEXT: movfr2gr.d $a0, $fa0
71+
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
72+
; CHECK-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill
73+
; CHECK-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload
74+
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1
75+
; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0
76+
; CHECK-NEXT: move $a0, $fp
77+
; CHECK-NEXT: bl %plt(__powidf2)
78+
; CHECK-NEXT: movfr2gr.d $a0, $fa0
79+
; CHECK-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload
80+
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1
81+
; CHECK-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
82+
; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
83+
; CHECK-NEXT: addi.d $sp, $sp, 48
84+
; CHECK-NEXT: ret
85+
entry:
86+
%res = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> %va, i32 %b)
87+
ret <2 x double> %res
88+
}

0 commit comments

Comments
 (0)