Skip to content

Commit 074e4ae

Browse files
authored
[DAG] foldABSToABD - support abs(*ext(x) - *ext(y)) -> zext(abd*(x, y)) from different extension source types (#71670)
We currently limit the fold to cases where we're extending from the same source type, but we can safely perform this using the wider of mismatching source types (we're really just interested in having extension bits on both sources), ensuring we don't create additional extensions/truncations.
1 parent 783ac3b commit 074e4ae

File tree

4 files changed

+65
-93
lines changed

4 files changed

+65
-93
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10923,11 +10923,12 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
1092310923

1092410924
// fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
1092510925
// fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10926-
// NOTE: Extensions must be equivalent.
10927-
if (VT1 == VT2 && hasOperation(ABDOpcode, VT1)) {
10928-
Op0 = Op0.getOperand(0);
10929-
Op1 = Op1.getOperand(0);
10930-
SDValue ABD = DAG.getNode(ABDOpcode, DL, VT1, Op0, Op1);
10926+
EVT MaxVT = VT1.bitsGT(VT2) ? VT1 : VT2;
10927+
if ((VT1 == MaxVT || Op0->hasOneUse()) &&
10928+
(VT2 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
10929+
SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
10930+
DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
10931+
DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
1093110932
ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
1093210933
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
1093310934
}

llvm/test/CodeGen/AArch64/sve-abd.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -248,17 +248,9 @@ define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x
248248
define <vscale x 4 x i32> @uabd_non_matching_extension(<vscale x 4 x i32> %a, <vscale x 4 x i8> %b) #0 {
249249
; CHECK-LABEL: uabd_non_matching_extension:
250250
; CHECK: // %bb.0:
251+
; CHECK-NEXT: ptrue p0.s
251252
; CHECK-NEXT: and z1.s, z1.s, #0xff
252-
; CHECK-NEXT: uunpkhi z2.d, z0.s
253-
; CHECK-NEXT: uunpklo z0.d, z0.s
254-
; CHECK-NEXT: ptrue p0.d
255-
; CHECK-NEXT: uunpkhi z3.d, z1.s
256-
; CHECK-NEXT: uunpklo z1.d, z1.s
257-
; CHECK-NEXT: sub z0.d, z0.d, z1.d
258-
; CHECK-NEXT: sub z1.d, z2.d, z3.d
259-
; CHECK-NEXT: abs z1.d, p0/m, z1.d
260-
; CHECK-NEXT: abs z0.d, p0/m, z0.d
261-
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
253+
; CHECK-NEXT: uabd z0.s, p0/m, z0.s, z1.s
262254
; CHECK-NEXT: ret
263255
%a.zext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
264256
%b.zext = zext <vscale x 4 x i8> %b to <vscale x 4 x i64>

llvm/test/CodeGen/X86/abds.ll

Lines changed: 29 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,13 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
5050
;
5151
; X64-LABEL: abd_ext_i8_i16:
5252
; X64: # %bb.0:
53-
; X64-NEXT: # kill: def $esi killed $esi def $rsi
54-
; X64-NEXT: # kill: def $edi killed $edi def $rdi
55-
; X64-NEXT: movsbq %dil, %rax
56-
; X64-NEXT: movswq %si, %rcx
57-
; X64-NEXT: movq %rax, %rdx
58-
; X64-NEXT: subq %rcx, %rdx
59-
; X64-NEXT: negq %rdx
60-
; X64-NEXT: subq %rcx, %rax
61-
; X64-NEXT: cmovleq %rdx, %rax
62-
; X64-NEXT: # kill: def $al killed $al killed $rax
53+
; X64-NEXT: movswl %si, %eax
54+
; X64-NEXT: movsbl %dil, %ecx
55+
; X64-NEXT: subl %eax, %ecx
56+
; X64-NEXT: movl %ecx, %eax
57+
; X64-NEXT: negl %eax
58+
; X64-NEXT: cmovsl %ecx, %eax
59+
; X64-NEXT: # kill: def $al killed $al killed $eax
6360
; X64-NEXT: retq
6461
%aext = sext i8 %a to i64
6562
%bext = sext i16 %b to i64
@@ -132,32 +129,25 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
132129
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
133130
; X86-LABEL: abd_ext_i16_i32:
134131
; X86: # %bb.0:
135-
; X86-NEXT: pushl %esi
136132
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
137-
; X86-NEXT: movl %ecx, %edx
138-
; X86-NEXT: sarl $31, %edx
139133
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
140-
; X86-NEXT: movl %eax, %esi
141-
; X86-NEXT: sarl $31, %esi
134+
; X86-NEXT: movl %eax, %edx
135+
; X86-NEXT: subl %ecx, %edx
136+
; X86-NEXT: negl %edx
142137
; X86-NEXT: subl %ecx, %eax
143-
; X86-NEXT: sbbl %edx, %esi
144-
; X86-NEXT: sarl $31, %esi
145-
; X86-NEXT: xorl %esi, %eax
146-
; X86-NEXT: subl %esi, %eax
138+
; X86-NEXT: cmovlel %edx, %eax
147139
; X86-NEXT: # kill: def $ax killed $ax killed $eax
148-
; X86-NEXT: popl %esi
149140
; X86-NEXT: retl
150141
;
151142
; X64-LABEL: abd_ext_i16_i32:
152143
; X64: # %bb.0:
153-
; X64-NEXT: # kill: def $edi killed $edi def $rdi
154-
; X64-NEXT: movswq %di, %rax
155-
; X64-NEXT: movslq %esi, %rcx
156-
; X64-NEXT: movq %rax, %rdx
157-
; X64-NEXT: subq %rcx, %rdx
158-
; X64-NEXT: negq %rdx
159-
; X64-NEXT: subq %rcx, %rax
160-
; X64-NEXT: cmovleq %rdx, %rax
144+
; X64-NEXT: movslq %esi, %rax
145+
; X64-NEXT: movswl %di, %ecx
146+
; X64-NEXT: movslq %ecx, %rcx
147+
; X64-NEXT: subq %rax, %rcx
148+
; X64-NEXT: movq %rcx, %rax
149+
; X64-NEXT: negq %rax
150+
; X64-NEXT: cmovsq %rcx, %rax
161151
; X64-NEXT: # kill: def $ax killed $ax killed $rax
162152
; X64-NEXT: retq
163153
%aext = sext i16 %a to i64
@@ -231,31 +221,24 @@ define i32 @abd_ext_i32(i32 %a, i32 %b) nounwind {
231221
define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
232222
; X86-LABEL: abd_ext_i32_i16:
233223
; X86: # %bb.0:
234-
; X86-NEXT: pushl %esi
235224
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
236-
; X86-NEXT: movl %ecx, %edx
237-
; X86-NEXT: sarl $31, %edx
238225
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
239-
; X86-NEXT: movl %eax, %esi
240-
; X86-NEXT: sarl $31, %esi
226+
; X86-NEXT: movl %eax, %edx
227+
; X86-NEXT: subl %ecx, %edx
228+
; X86-NEXT: negl %edx
241229
; X86-NEXT: subl %ecx, %eax
242-
; X86-NEXT: sbbl %edx, %esi
243-
; X86-NEXT: sarl $31, %esi
244-
; X86-NEXT: xorl %esi, %eax
245-
; X86-NEXT: subl %esi, %eax
246-
; X86-NEXT: popl %esi
230+
; X86-NEXT: cmovlel %edx, %eax
247231
; X86-NEXT: retl
248232
;
249233
; X64-LABEL: abd_ext_i32_i16:
250234
; X64: # %bb.0:
251-
; X64-NEXT: # kill: def $esi killed $esi def $rsi
252-
; X64-NEXT: movslq %edi, %rax
253-
; X64-NEXT: movswq %si, %rcx
254-
; X64-NEXT: movq %rax, %rdx
255-
; X64-NEXT: subq %rcx, %rdx
256-
; X64-NEXT: negq %rdx
257-
; X64-NEXT: subq %rcx, %rax
258-
; X64-NEXT: cmovleq %rdx, %rax
235+
; X64-NEXT: movslq %edi, %rcx
236+
; X64-NEXT: movswl %si, %eax
237+
; X64-NEXT: cltq
238+
; X64-NEXT: subq %rax, %rcx
239+
; X64-NEXT: movq %rcx, %rax
240+
; X64-NEXT: negq %rax
241+
; X64-NEXT: cmovsq %rcx, %rax
259242
; X64-NEXT: # kill: def $eax killed $eax killed $rax
260243
; X64-NEXT: retq
261244
%aext = sext i32 %a to i64

llvm/test/CodeGen/X86/abdu.ll

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,13 @@ define i8 @abd_ext_i8_i16(i8 %a, i16 %b) nounwind {
5050
;
5151
; X64-LABEL: abd_ext_i8_i16:
5252
; X64: # %bb.0:
53-
; X64-NEXT: movzbl %dil, %eax
54-
; X64-NEXT: movzwl %si, %ecx
55-
; X64-NEXT: movq %rax, %rdx
56-
; X64-NEXT: subq %rcx, %rdx
57-
; X64-NEXT: negq %rdx
58-
; X64-NEXT: subq %rcx, %rax
59-
; X64-NEXT: cmovbeq %rdx, %rax
60-
; X64-NEXT: # kill: def $al killed $al killed $rax
53+
; X64-NEXT: movzwl %si, %eax
54+
; X64-NEXT: movzbl %dil, %ecx
55+
; X64-NEXT: subl %eax, %ecx
56+
; X64-NEXT: movl %ecx, %eax
57+
; X64-NEXT: negl %eax
58+
; X64-NEXT: cmovsl %ecx, %eax
59+
; X64-NEXT: # kill: def $al killed $al killed $eax
6160
; X64-NEXT: retq
6261
%aext = zext i8 %a to i64
6362
%bext = zext i16 %b to i64
@@ -130,25 +129,24 @@ define i16 @abd_ext_i16(i16 %a, i16 %b) nounwind {
130129
define i16 @abd_ext_i16_i32(i16 %a, i32 %b) nounwind {
131130
; X86-LABEL: abd_ext_i16_i32:
132131
; X86: # %bb.0:
132+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
133133
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
134-
; X86-NEXT: xorl %ecx, %ecx
135-
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
136-
; X86-NEXT: sbbl %ecx, %ecx
137-
; X86-NEXT: sarl $31, %ecx
138-
; X86-NEXT: xorl %ecx, %eax
134+
; X86-NEXT: movl %eax, %edx
135+
; X86-NEXT: subl %ecx, %edx
136+
; X86-NEXT: negl %edx
139137
; X86-NEXT: subl %ecx, %eax
138+
; X86-NEXT: cmovbel %edx, %eax
140139
; X86-NEXT: # kill: def $ax killed $ax killed $eax
141140
; X86-NEXT: retl
142141
;
143142
; X64-LABEL: abd_ext_i16_i32:
144143
; X64: # %bb.0:
145-
; X64-NEXT: movzwl %di, %eax
146-
; X64-NEXT: movl %esi, %ecx
147-
; X64-NEXT: movq %rax, %rdx
148-
; X64-NEXT: subq %rcx, %rdx
149-
; X64-NEXT: negq %rdx
150-
; X64-NEXT: subq %rcx, %rax
151-
; X64-NEXT: cmovbeq %rdx, %rax
144+
; X64-NEXT: movl %esi, %eax
145+
; X64-NEXT: movzwl %di, %ecx
146+
; X64-NEXT: subq %rax, %rcx
147+
; X64-NEXT: movq %rcx, %rax
148+
; X64-NEXT: negq %rax
149+
; X64-NEXT: cmovsq %rcx, %rax
152150
; X64-NEXT: # kill: def $ax killed $ax killed $rax
153151
; X64-NEXT: retq
154152
%aext = zext i16 %a to i64
@@ -224,23 +222,21 @@ define i32 @abd_ext_i32_i16(i32 %a, i16 %b) nounwind {
224222
; X86: # %bb.0:
225223
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
226224
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
227-
; X86-NEXT: xorl %edx, %edx
225+
; X86-NEXT: movl %eax, %edx
226+
; X86-NEXT: subl %ecx, %edx
227+
; X86-NEXT: negl %edx
228228
; X86-NEXT: subl %ecx, %eax
229-
; X86-NEXT: sbbl %edx, %edx
230-
; X86-NEXT: sarl $31, %edx
231-
; X86-NEXT: xorl %edx, %eax
232-
; X86-NEXT: subl %edx, %eax
229+
; X86-NEXT: cmovbel %edx, %eax
233230
; X86-NEXT: retl
234231
;
235232
; X64-LABEL: abd_ext_i32_i16:
236233
; X64: # %bb.0:
237-
; X64-NEXT: movl %edi, %eax
238-
; X64-NEXT: movzwl %si, %ecx
239-
; X64-NEXT: movq %rax, %rdx
240-
; X64-NEXT: subq %rcx, %rdx
241-
; X64-NEXT: negq %rdx
242-
; X64-NEXT: subq %rcx, %rax
243-
; X64-NEXT: cmovbeq %rdx, %rax
234+
; X64-NEXT: movl %edi, %ecx
235+
; X64-NEXT: movzwl %si, %eax
236+
; X64-NEXT: subq %rax, %rcx
237+
; X64-NEXT: movq %rcx, %rax
238+
; X64-NEXT: negq %rax
239+
; X64-NEXT: cmovsq %rcx, %rax
244240
; X64-NEXT: # kill: def $eax killed $eax killed $rax
245241
; X64-NEXT: retq
246242
%aext = zext i32 %a to i64

0 commit comments

Comments
 (0)