Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit c1d9622

Browse files
committed
Backport r285278 [ARM] Predicate UMAAL selection on hasDSP.
UMAAL is a DSP instruction and it is not available on thumbv7m (Cortex-M3) and thumbv6m (Cortex-M0+1) targets. Also fix wrong CHECK prefix in longMAC.ll test. Patch by Vadzim Dambrouski. Differential Revision: https://reviews.llvm.org/D25890 # Conflicts: # lib/Target/ARM/ARMISelLowering.cpp # test/CodeGen/ARM/longMAC.ll
1 parent ff8ecfc commit c1d9622

File tree

3 files changed

+128
-40
lines changed

3 files changed

+128
-40
lines changed

lib/Target/ARM/ARMISelDAGToDAG.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2971,7 +2971,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
29712971
case ARMISD::UMLAL:{
29722972
// UMAAL is similar to UMLAL but it adds two 32-bit values to the
29732973
// 64-bit multiplication result.
2974-
if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC &&
2974+
if (Subtarget->hasV6Ops() && Subtarget->hasDSP() &&
2975+
N->getOperand(2).getOpcode() == ARMISD::ADDC &&
29752976
N->getOperand(3).getOpcode() == ARMISD::ADDE) {
29762977

29772978
SDValue Addc = N->getOperand(2);

lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8962,7 +8962,8 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
89628962
// be combined into a UMLAL. The other pattern is AddcNode being combined
89638963
// into an UMLAL and then using another addc is handled in ISelDAGToDAG.
89648964

8965-
if (!Subtarget->hasV6Ops())
8965+
if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() ||
8966+
(Subtarget->isThumb() && !Subtarget->hasThumb2()))
89668967
return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
89678968

89688969
SDNode *PrevAddc = nullptr;

test/CodeGen/ARM/longMAC.ll

Lines changed: 124 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,33 @@
22
; RUN: llc -mtriple=armv7-eabi %s -o - | FileCheck %s --check-prefix=CHECK-V7-LE
33
; RUN: llc -mtriple=armeb-eabi %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
44
; RUN: llc -mtriple=armebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-BE
5+
; RUN: llc -mtriple=thumbv6-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6-THUMB
6+
; RUN: llc -mtriple=thumbv6t2-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6-THUMB2
7+
; RUN: llc -mtriple=thumbv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-THUMB
8+
; RUN: llc -mtriple=thumbebv7-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7-THUMB-BE
9+
; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V6M-THUMB
10+
; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7M-THUMB
11+
; RUN: llc -mtriple=thumbv7em-eabi %s -o - | FileCheck %s -check-prefix=CHECK-V7EM-THUMB
512
; Check generated signed and unsigned multiply accumulate long.
613

714
define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
815
;CHECK-LABEL: MACLongTest1:
9-
;CHECK: umlal
16+
;CHECK-V6-THUMB-NOT: umlal
17+
;CHECK-LE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
18+
;CHECK-LE: mov r0, [[RDLO]]
19+
;CHECK-LE: mov r1, [[RDHI]]
20+
;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
21+
;CHECK-BE: mov r0, [[RDHI]]
22+
;CHECK-BE: mov r1, [[RDLO]]
23+
;CHECK-V6-THUMB2: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
24+
;CHECK-V6-THUMB2: mov r0, [[RDLO]]
25+
;CHECK-V6-THUMB2: mov r1, [[RDHI]]
26+
;CHECK-V7-THUMB: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
27+
;CHECK-V7-THUMB: mov r0, [[RDLO]]
28+
;CHECK-V7-THUMB: mov r1, [[RDHI]]
29+
;CHECK-V7-THUMB-BE: umlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
30+
;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
31+
;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
1032
%conv = zext i32 %a to i64
1133
%conv1 = zext i32 %b to i64
1234
%mul = mul i64 %conv1, %conv
@@ -16,17 +38,49 @@ define i64 @MACLongTest1(i32 %a, i32 %b, i64 %c) {
1638

1739
define i64 @MACLongTest2(i32 %a, i32 %b, i64 %c) {
1840
;CHECK-LABEL: MACLongTest2:
19-
;CHECK: smlal
41+
;CHECK-LE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
42+
;CHECK-LE: mov r0, [[RDLO]]
43+
;CHECK-LE: mov r1, [[RDHI]]
44+
;CHECK-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
45+
;CHECK-BE: mov r0, [[RDHI]]
46+
;CHECK-BE: mov r1, [[RDLO]]
47+
;CHECK-V6-THUMB2: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
48+
;CHECK-V6-THUMB2: mov r0, [[RDLO]]
49+
;CHECK-V6-THUMB2: mov r1, [[RDHI]]
50+
;CHECK-V7-THUMB: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
51+
;CHECK-V7-THUMB: mov r0, [[RDLO]]
52+
;CHECK-V7-THUMB: mov r1, [[RDHI]]
53+
;CHECK-V7-THUMB-BE: smlal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
54+
;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
55+
;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
2056
%conv = sext i32 %a to i64
2157
%conv1 = sext i32 %b to i64
2258
%mul = mul nsw i64 %conv1, %conv
2359
%add = add nsw i64 %mul, %c
2460
ret i64 %add
2561
}
2662

63+
; Two things to check here: the @earlyclobber constraint (on <= v5) and the "$Rd = $R" ones.
64+
; + Without @earlyclobber the v7 code is natural. With it, the first two
65+
; registers must be distinct from the third.
66+
; + Without "$Rd = $R", this can be satisfied without a mov before the umlal
67+
; by trying to use 6 different registers in the MachineInstr. The natural
68+
; evolution of this attempt currently leaves only two movs in the final
69+
; function, both after the umlal. With it, *some* move has to happen
70+
; before the umlal.
2771
define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
2872
;CHECK-LABEL: MACLongTest3:
29-
;CHECK: umlal
73+
;CHECK-LE: mov [[RDHI:r[0-9]+]], #0
74+
;CHECK-LE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0
75+
;CHECK-LE: mov r0, [[RDLO]]
76+
;CHECK-LE: mov r1, [[RDHI]]
77+
;CHECK-BE: mov [[RDHI:r[0-9]+]], #0
78+
;CHECK-BE: umlal [[RDLO:r[0-9]+]], [[RDHI]], r1, r0
79+
;CHECK-BE: mov r0, [[RDHI]]
80+
;CHECK-BE: mov r1, [[RDLO]]
81+
;CHECK-V6-THUMB2: umlal
82+
;CHECK-V7-THUMB: umlal
83+
;CHECK-V6-THUMB-NOT: umlal
3084
%conv = zext i32 %b to i64
3185
%conv1 = zext i32 %a to i64
3286
%mul = mul i64 %conv, %conv1
@@ -37,7 +91,17 @@ define i64 @MACLongTest3(i32 %a, i32 %b, i32 %c) {
3791

3892
define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
3993
;CHECK-LABEL: MACLongTest4:
40-
;CHECK: smlal
94+
;CHECK-V6-THUMB-NOT: smlal
95+
;CHECK-V6-THUMB2: smlal
96+
;CHECK-V7-THUMB: smlal
97+
;CHECK-LE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31
98+
;CHECK-LE: smlal [[RDLO]], [[RDHI]], r1, r0
99+
;CHECK-LE: mov r0, [[RDLO]]
100+
;CHECK-LE: mov r1, [[RDHI]]
101+
;CHECK-BE: asr [[RDHI:r[0-9]+]], [[RDLO:r[0-9]+]], #31
102+
;CHECK-BE: smlal [[RDLO]], [[RDHI]], r1, r0
103+
;CHECK-BE: mov r0, [[RDHI]]
104+
;CHECK-BE: mov r1, [[RDLO]]
41105
%conv = sext i32 %b to i64
42106
%conv1 = sext i32 %a to i64
43107
%mul = mul nsw i64 %conv, %conv1
@@ -46,40 +110,18 @@ define i64 @MACLongTest4(i32 %a, i32 %b, i32 %c) {
46110
ret i64 %add
47111
}
48112

49-
; Two things to check here: the @earlyclobber constraint (on <= v5) and the "$Rd = $R" ones.
50-
; + Without @earlyclobber the v7 code is natural. With it, the first two
51-
; registers must be distinct from the third.
52-
; + Without "$Rd = $R", this can be satisfied without a mov before the umlal
53-
; by trying to use 6 different registers in the MachineInstr. The natural
54-
; evolution of this attempt currently leaves only two movs in the final
55-
; function, both after the umlal. With it, *some* move has to happen
56-
; before the umlal.
57-
define i64 @MACLongTest5(i64 %c, i32 %a, i32 %b) {
58-
; CHECK-V7-LE-LABEL: MACLongTest5:
59-
; CHECK-V7-LE-LABEL: umlal r0, r1, r0, r0
60-
; CHECK-V7-BE-LABEL: MACLongTest5:
61-
; CHECK-V7-BE-LABEL: umlal r1, r0, r1, r1
62-
63-
; CHECK-LABEL: MACLongTest5:
64-
; CHECK-LE: mov [[RDLO:r[0-9]+]], r0
65-
; CHECK-LE: umlal [[RDLO]], r1, r0, r0
66-
; CHECK-LE: mov r0, [[RDLO]]
67-
; CHECK-BE: mov [[RDLO:r[0-9]+]], r1
68-
; CHECK-BE: umlal [[RDLO]], r0, r1, r1
69-
; CHECK-BE: mov r1, [[RDLO]]
70-
71-
%conv.trunc = trunc i64 %c to i32
72-
%conv = zext i32 %conv.trunc to i64
73-
%conv1 = zext i32 %b to i64
74-
%mul = mul i64 %conv, %conv
75-
%add = add i64 %mul, %c
76-
ret i64 %add
77-
}
78-
79113
define i64 @MACLongTest6(i32 %a, i32 %b, i32 %c, i32 %d) {
80114
;CHECK-LABEL: MACLongTest6:
115+
;CHECK-V6-THUMB-NOT: smull
116+
;CHECK-V6-THUMB-NOT: smlal
81117
;CHECK: smull r12, lr, r1, r0
82118
;CHECK: smlal r12, lr, r3, r2
119+
;CHECK-V7: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
120+
;CHECK-V7: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
121+
;CHECK-V7-THUMB: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
122+
;CHECK-V7-THUMB: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
123+
;CHECK-V6-THUMB2: smull [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], r1, r0
124+
;CHECK-V6-THUMB2: smlal [[RDLO]], [[RDHI]], [[Rn:r[0-9]+]], [[Rm:r[0-9]+]]
83125
%conv = sext i32 %a to i64
84126
%conv1 = sext i32 %b to i64
85127
%mul = mul nsw i64 %conv1, %conv
@@ -93,6 +135,9 @@ define i64 @MACLongTest6(i32 %a, i32 %b, i32 %c, i32 %d) {
93135
define i64 @MACLongTest7(i64 %acc, i32 %lhs, i32 %rhs) {
94136
;CHECK-LABEL: MACLongTest7:
95137
;CHECK-NOT: smlal
138+
;CHECK-V6-THUMB2-NOT: smlal
139+
;CHECK-V7-THUMB-NOT: smlal
140+
;CHECK-V6-THUMB-NOT: smlal
96141
%conv = sext i32 %lhs to i64
97142
%conv1 = sext i32 %rhs to i64
98143
%mul = mul nsw i64 %conv1, %conv
@@ -106,6 +151,9 @@ define i64 @MACLongTest7(i64 %acc, i32 %lhs, i32 %rhs) {
106151
define i64 @MACLongTest8(i64 %acc, i32 %lhs, i32 %rhs) {
107152
;CHECK-LABEL: MACLongTest8:
108153
;CHECK-NOT: smlal
154+
;CHECK-V6-THUMB2-NOT: smlal
155+
;CHECK-V7-THUMB-NOT: smlal
156+
;CHECK-V6-THUMB-NOT: smlal
109157
%conv = zext i32 %lhs to i64
110158
%conv1 = zext i32 %rhs to i64
111159
%mul = mul nuw i64 %conv1, %conv
@@ -118,9 +166,28 @@ define i64 @MACLongTest8(i64 %acc, i32 %lhs, i32 %rhs) {
118166

119167
define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
120168
;CHECK-LABEL: MACLongTest9:
121-
;CHECK-V7-LE:umaal
122-
;CHECK-V7-BE:umaal
169+
;CHECK-V7-LE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
170+
;CHECK-V7-LE: mov r0, [[RDLO]]
171+
;CHECK-V7-LE: mov r1, [[RDHI]]
172+
;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
173+
;CHECK-V7-BE: mov r0, [[RDHI]]
174+
;CHECK-V7-BE: mov r1, [[RDLO]]
175+
;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
176+
;CHECK-V6-THUMB2: mov r0, [[RDLO]]
177+
;CHECK-V6-THUMB2: mov r1, [[RDHI]]
178+
;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
179+
;CHECK-V7-THUMB: mov r0, [[RDLO]]
180+
;CHECK-V7-THUMB: mov r1, [[RDHI]]
181+
;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
182+
;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
183+
;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
184+
;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
185+
;CHECK-V7EM-THUMB: mov r0, [[RDLO]]
186+
;CHECK-V7EM-THUMB: mov r1, [[RDHI]]
123187
;CHECK-NOT:umaal
188+
;CHECK-V6-THUMB-NOT: umaal
189+
;CHECK-V6M-THUMB-NOT: umaal
190+
;CHECK-V7M-THUMB-NOT: umaal
124191
%conv = zext i32 %lhs to i64
125192
%conv1 = zext i32 %rhs to i64
126193
%mul = mul nuw i64 %conv1, %conv
@@ -133,9 +200,28 @@ define i64 @MACLongTest9(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
133200

134201
define i64 @MACLongTest10(i32 %lhs, i32 %rhs, i32 %lo, i32 %hi) {
135202
;CHECK-LABEL: MACLongTest10:
136-
;CHECK-V7-LE:umaal
137-
;CHECK-V7-BE:umaal
203+
;CHECK-V7-LE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
204+
;CHECK-V7-LE: mov r0, [[RDLO]]
205+
;CHECK-V7-LE: mov r1, [[RDHI]]
206+
;CHECK-V7-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
207+
;CHECK-V7-BE: mov r0, [[RDHI]]
208+
;CHECK-V7-BE: mov r1, [[RDLO]]
209+
;CHECK-V6-THUMB2: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
210+
;CHECK-V6-THUMB2: mov r0, [[RDLO]]
211+
;CHECK-V6-THUMB2: mov r1, [[RDHI]]
212+
;CHECK-V7-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
213+
;CHECK-V7-THUMB: mov r0, [[RDLO]]
214+
;CHECK-V7-THUMB: mov r1, [[RDHI]]
215+
;CHECK-V7-THUMB-BE: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
216+
;CHECK-V7-THUMB-BE: mov r0, [[RDHI]]
217+
;CHECK-V7-THUMB-BE: mov r1, [[RDLO]]
218+
;CHECK-V7EM-THUMB: umaal [[RDLO:r[0-9]+]], [[RDHI:r[0-9]+]], [[LHS:r[0-9]+]], [[RHS:r[0-9]+]]
219+
;CHECK-V7EM-THUMB: mov r0, [[RDLO]]
220+
;CHECK-V7EM-THUMB: mov r1, [[RDHI]]
138221
;CHECK-NOT:umaal
222+
;CHECK-V6-THUMB-NOT:umaal
223+
;CHECK-V6M-THUMB-NOT: umaal
224+
;CHECK-V7M-THUMB-NOT: umaal
139225
%conv = zext i32 %lhs to i64
140226
%conv1 = zext i32 %rhs to i64
141227
%mul = mul nuw i64 %conv1, %conv

0 commit comments

Comments
 (0)