Skip to content

Commit e6cebd0

Browse files
committed
GlobalISel: fewerElementsVector for more cast types
llvm-svn: 352166
1 parent 95fd95c commit e6cebd0

File tree

6 files changed

+162
-3
lines changed

6 files changed

+162
-3
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,6 +1511,11 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
15111511
case TargetOpcode::G_SEXT:
15121512
case TargetOpcode::G_ANYEXT:
15131513
case TargetOpcode::G_FPEXT:
1514+
case TargetOpcode::G_FPTRUNC:
1515+
case TargetOpcode::G_SITOFP:
1516+
case TargetOpcode::G_UITOFP:
1517+
case TargetOpcode::G_FPTOSI:
1518+
case TargetOpcode::G_FPTOUI:
15141519
return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
15151520
}
15161521
}

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
148148
.clampScalar(0, S32, S64);
149149

150150
getActionDefinitionsBuilder(G_FPTRUNC)
151-
.legalFor({{S32, S64}, {S16, S32}});
151+
.legalFor({{S32, S64}, {S16, S32}})
152+
.scalarize(0);
152153

153154
getActionDefinitionsBuilder(G_FPEXT)
154155
.legalFor({{S64, S32}, {S32, S16}})
@@ -171,10 +172,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
171172
.scalarize(0);
172173

173174
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
174-
.legalFor({{S32, S32}, {S64, S32}});
175+
.legalFor({{S32, S32}, {S64, S32}})
176+
.scalarize(0);
175177

176178
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
177-
.legalFor({{S32, S32}, {S32, S64}});
179+
.legalFor({{S32, S32}, {S32, S64}})
180+
.scalarize(0);
178181

179182
getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND})
180183
.legalFor({S32, S64});

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptosi.mir

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@ body: |
99
1010
; CHECK-LABEL: name: test_fptosi_s32_to_s32
1111
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
12+
; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32)
13+
; CHECK: $vgpr0 = COPY [[FPTOSI]](s32)
1214
%0:_(s32) = COPY $vgpr0
1315
%1:_(s32) = G_FPTOSI %0
16+
$vgpr0 = COPY %1
1417
...
1518

1619
---
@@ -21,6 +24,45 @@ body: |
2124
2225
; CHECK-LABEL: name: test_fptosi_s64_to_s32
2326
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
27+
; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64)
28+
; CHECK: $vgpr0 = COPY [[FPTOSI]](s32)
2429
%0:_(s64) = COPY $vgpr0_vgpr1
2530
%1:_(s32) = G_FPTOSI %0
31+
$vgpr0 = COPY %1
32+
...
33+
34+
---
35+
name: test_fptosi_v2s32_to_v2s32
36+
body: |
37+
bb.0:
38+
liveins: $vgpr0_vgpr1
39+
40+
; CHECK-LABEL: name: test_fptosi_v2s32_to_v2s32
41+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
42+
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
43+
; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[UV]](s32)
44+
; CHECK: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[UV1]](s32)
45+
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOSI]](s32), [[FPTOSI1]](s32)
46+
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
47+
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
48+
%1:_(<2 x s32>) = G_FPTOSI %0
49+
$vgpr0_vgpr1 = COPY %1
50+
...
51+
52+
---
53+
name: test_fptosi_v2s64_to_v2s32
54+
body: |
55+
bb.0:
56+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
57+
58+
; CHECK-LABEL: name: test_fptosi_v2s64_to_v2s32
59+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
60+
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
61+
; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[UV]](s64)
62+
; CHECK: [[FPTOSI1:%[0-9]+]]:_(s32) = G_FPTOSI [[UV1]](s64)
63+
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOSI]](s32), [[FPTOSI1]](s32)
64+
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
65+
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
66+
%1:_(<2 x s32>) = G_FPTOSI %0
67+
$vgpr0_vgpr1 = COPY %1
2668
...

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptoui.mir

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,38 @@ body: |
3131
$vgpr0 = COPY %1
3232
...
3333

34+
---
35+
name: test_fptoui_v2s32_to_v2s32
36+
body: |
37+
bb.0:
38+
liveins: $vgpr0_vgpr1
39+
40+
; CHECK-LABEL: name: test_fptoui_v2s32_to_v2s32
41+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
42+
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
43+
; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[UV]](s32)
44+
; CHECK: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[UV1]](s32)
45+
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOUI]](s32), [[FPTOUI1]](s32)
46+
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
47+
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
48+
%1:_(<2 x s32>) = G_FPTOUI %0
49+
$vgpr0_vgpr1 = COPY %1
50+
...
51+
52+
---
53+
name: test_fptoui_v2s64_to_v2s32
54+
body: |
55+
bb.0:
56+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
57+
58+
; CHECK-LABEL: name: test_fptoui_v2s64_to_v2s32
59+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
60+
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
61+
; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[UV]](s64)
62+
; CHECK: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[UV1]](s64)
63+
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTOUI]](s32), [[FPTOUI1]](s32)
64+
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
65+
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
66+
%1:_(<2 x s32>) = G_FPTOUI %0
67+
$vgpr0_vgpr1 = COPY %1
68+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,42 @@ body: |
3232
%2:_(s32) = G_ANYEXT %1
3333
$vgpr0 = COPY %2
3434
...
35+
36+
---
37+
name: test_fptrunc_v2s64_to_v2s32
38+
body: |
39+
bb.0:
40+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
41+
42+
; CHECK-LABEL: name: test_fptrunc_v2s64_to_v2s32
43+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
44+
; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
45+
; CHECK: [[FPTRUNC:%[0-9]+]]:_(s32) = G_FPTRUNC [[UV]](s64)
46+
; CHECK: [[FPTRUNC1:%[0-9]+]]:_(s32) = G_FPTRUNC [[UV1]](s64)
47+
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FPTRUNC]](s32), [[FPTRUNC1]](s32)
48+
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
49+
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
50+
%1:_(<2 x s32>) = G_FPTRUNC %0
51+
$vgpr0_vgpr1 = COPY %1
52+
...
53+
54+
---
55+
name: test_fptrunc_v2s32_to_v2s16
56+
body: |
57+
bb.0:
58+
liveins: $vgpr0_vgpr1
59+
60+
; CHECK-LABEL: name: test_fptrunc_v2s32_to_v2s16
61+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
62+
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
63+
; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV]](s32)
64+
; CHECK: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV1]](s32)
65+
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
66+
; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16)
67+
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
68+
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
69+
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
70+
%1:_(<2 x s16>) = G_FPTRUNC %0
71+
%2:_(<2 x s32>) = G_ANYEXT %1
72+
$vgpr0_vgpr1 = COPY %2
73+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,38 @@ body: |
3131
$vgpr0_vgpr1 = COPY %1
3232
...
3333

34+
---
35+
name: test_sitofp_v2s32_to_v2s32
36+
body: |
37+
bb.0:
38+
liveins: $vgpr0_vgpr1
39+
40+
; CHECK-LABEL: name: test_sitofp_v2s32_to_v2s32
41+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
42+
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
43+
; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[UV]](s32)
44+
; CHECK: [[SITOFP1:%[0-9]+]]:_(s32) = G_SITOFP [[UV1]](s32)
45+
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SITOFP]](s32), [[SITOFP1]](s32)
46+
; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
47+
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
48+
%1:_(<2 x s32>) = G_SITOFP %0
49+
$vgpr0_vgpr1 = COPY %1
50+
...
51+
52+
---
53+
name: test_sitofp_v2s32_to_v2s64
54+
body: |
55+
bb.0:
56+
liveins: $vgpr0_vgpr1
57+
58+
; CHECK-LABEL: name: test_sitofp_v2s32_to_v2s64
59+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
60+
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
61+
; CHECK: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV]](s32)
62+
; CHECK: [[SITOFP1:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32)
63+
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SITOFP]](s64), [[SITOFP1]](s64)
64+
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
65+
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
66+
%1:_(<2 x s64>) = G_SITOFP %0
67+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
68+
...

0 commit comments

Comments
 (0)