Skip to content

Commit 7ef283a

Browse files
Changpeng Fangfhahn
authored andcommitted
DADCombiner: Don't simplify the token factor if the node's number of operands already exceeds TokenFactorInlineLimit
Summary: In parallelizeChainedStores, a TokenFactor was created with the size greater than 3000. We found that DAGCombiner::visitTokenFactor will consume a huge amount of time on such nodes. Since the number of operands already exceeds TokenFactorInlineLimit, we propose to give up simplification with the consideration of compile time. Reviewers: @spatel, @arsenm Differential Revision: https://reviews.llvm.org/D84204
1 parent 5cd85d0 commit 7ef283a

File tree

2 files changed

+62
-0
lines changed

2 files changed

+62
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1805,6 +1805,10 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
18051805
if (OptLevel == CodeGenOpt::None)
18061806
return SDValue();
18071807

1808+
// Don't simplify the token factor if the node itself has too many operands.
1809+
if (N->getNumOperands() > TokenFactorInlineLimit)
1810+
return SDValue();
1811+
18081812
// If the sole user is a token factor, we should make sure we have a
18091813
// chance to merge them together. This prevents TF chains from inhibiting
18101814
// optimizations.
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-TFILD %s
2+
; RUN: llc -march=amdgcn -mcpu=gfx900 -combiner-tokenfactor-inline-limit=7 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-TFIL7 %s
3+
4+
5+
; GCN-LABEL: {{^}}token_factor_inline_limit_test:
6+
7+
; GCN-TFILD: v_mov_b32_e32 [[REG8:v[0-9]+]], 8
8+
; GCN-TFILD: v_mov_b32_e32 [[REG9:v[0-9]+]], 9
9+
; GCN-TFILD: v_mov_b32_e32 [[REG10:v[0-9]+]], 10
10+
; GCN-TFILD: v_mov_b32_e32 [[REG11:v[0-9]+]], 11
11+
; GCN-TFILD: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
12+
; GCN-TFILD: buffer_store_dword [[REG8]], {{.*$}}
13+
; GCN-TFILD: buffer_store_dword [[REG9]], {{.*}} offset:4
14+
; GCN-TFILD: buffer_store_dword [[REG10]], {{.*}} offset:8
15+
; GCN-TFILD: buffer_store_dword [[REG11]], {{.*}} offset:12
16+
; GCN-TFILD: buffer_store_dword [[REG12]], {{.*}} offset:16
17+
; GCN-TFILD: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
18+
; GCN-TFILD: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
19+
; GCN-TFILD: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
20+
; GCN-TFILD: buffer_store_dword [[REG13]], {{.*}} offset:20
21+
; GCN-TFILD: buffer_store_dword [[REG14]], {{.*}} offset:24
22+
; GCN-TFILD: buffer_store_dword [[REG15]], {{.*}} offset:28
23+
24+
; GCN-TFIL7: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
25+
; GCN-TFIL7: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
26+
; GCN-TFIL7: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
27+
; GCN-TFIL7: buffer_store_dword [[REG15]], {{.*}} offset:28
28+
; GCN-TFIL7: buffer_store_dword [[REG14]], {{.*}} offset:24
29+
; GCN-TFIL7: buffer_store_dword [[REG13]], {{.*}} offset:20
30+
; GCN-TFIL7: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
31+
; GCN-TFIL7: v_mov_b32_e32 [[REG11:v[0-9]+]], 11
32+
; GCN-TFIL7: v_mov_b32_e32 [[REG10:v[0-9]+]], 10
33+
; GCN-TFIL7: v_mov_b32_e32 [[REG9:v[0-9]+]], 9
34+
; GCN-TFIL7: v_mov_b32_e32 [[REG8:v[0-9]+]], 8
35+
; GCN-TFIL7: buffer_store_dword [[REG12]], {{.*}} offset:16
36+
; GCN-TFIL7: buffer_store_dword [[REG11]], {{.*}} offset:12
37+
; GCN-TFIL7: buffer_store_dword [[REG10]], {{.*}} offset:8
38+
; GCN-TFIL7: buffer_store_dword [[REG9]], {{.*}} offset:4
39+
; GCN-TFIL7: buffer_store_dword [[REG8]], {{.*$}}
40+
41+
; GCN: v_mov_b32_e32 v31, 7
42+
; GCN: s_getpc
43+
define void @token_factor_inline_limit_test() {
44+
entry:
45+
call void @external_void_func_8xv5i32(
46+
<5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>,
47+
<5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>,
48+
<5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>,
49+
<5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>,
50+
<5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>,
51+
<5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>,
52+
<5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>,
53+
<5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>)
54+
ret void
55+
}
56+
57+
declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
58+
<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>)

0 commit comments

Comments
 (0)