Skip to content

Commit 1085b70

Browse files
committed
[DAG] Don't fold (zext (bitop (load x), cst)) -> (bitop (zextload x), (zext cst)) if the zext is free
Prevents an infinite loop if we've been trying to narrow the bitop to a more preferable type
1 parent bde5717 commit 1085b70

File tree

2 files changed

+26
-28
lines changed

2 files changed

+26
-28
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13717,8 +13717,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
1371713717
// fold (zext (and/or/xor (load x), cst)) ->
1371813718
// (and/or/xor (zextload x), (zext cst))
1371913719
// Unless (and (load x) cst) will match as a zextload already and has
13720-
// additional users.
13721-
if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13720+
// additional users, or the zext is already free.
13721+
if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
1372213722
isa<LoadSDNode>(N0.getOperand(0)) &&
1372313723
N0.getOperand(1).getOpcode() == ISD::Constant &&
1372413724
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {

llvm/test/CodeGen/X86/wide-scalar-shift-by-byte-multiple-legalization.ll

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2367,25 +2367,24 @@ define void @shl_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
23672367
define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
23682368
; X64-SSE2-LABEL: ashr_64bytes:
23692369
; X64-SSE2: # %bb.0:
2370-
; X64-SSE2-NEXT: pushq %r14
23712370
; X64-SSE2-NEXT: pushq %rbx
2372-
; X64-SSE2-NEXT: movq (%rdi), %rcx
2373-
; X64-SSE2-NEXT: movq 8(%rdi), %r8
2374-
; X64-SSE2-NEXT: movq 16(%rdi), %r9
2375-
; X64-SSE2-NEXT: movq 24(%rdi), %r10
2376-
; X64-SSE2-NEXT: movq 32(%rdi), %r11
2377-
; X64-SSE2-NEXT: movq 40(%rdi), %rbx
2378-
; X64-SSE2-NEXT: movq 48(%rdi), %r14
2371+
; X64-SSE2-NEXT: movq (%rdi), %rax
2372+
; X64-SSE2-NEXT: movq 8(%rdi), %rcx
2373+
; X64-SSE2-NEXT: movq 16(%rdi), %r8
2374+
; X64-SSE2-NEXT: movq 24(%rdi), %r9
2375+
; X64-SSE2-NEXT: movq 32(%rdi), %r10
2376+
; X64-SSE2-NEXT: movq 40(%rdi), %r11
2377+
; X64-SSE2-NEXT: movq 48(%rdi), %rbx
23792378
; X64-SSE2-NEXT: movq 56(%rdi), %rdi
2380-
; X64-SSE2-NEXT: movl (%rsi), %eax
2379+
; X64-SSE2-NEXT: movl (%rsi), %esi
23812380
; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
2382-
; X64-SSE2-NEXT: movq %r14, -{{[0-9]+}}(%rsp)
23832381
; X64-SSE2-NEXT: movq %rbx, -{{[0-9]+}}(%rsp)
23842382
; X64-SSE2-NEXT: movq %r11, -{{[0-9]+}}(%rsp)
23852383
; X64-SSE2-NEXT: movq %r10, -{{[0-9]+}}(%rsp)
23862384
; X64-SSE2-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
23872385
; X64-SSE2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
23882386
; X64-SSE2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
2387+
; X64-SSE2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
23892388
; X64-SSE2-NEXT: sarq $63, %rdi
23902389
; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
23912390
; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
@@ -2395,25 +2394,24 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
23952394
; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
23962395
; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
23972396
; X64-SSE2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
2398-
; X64-SSE2-NEXT: andl $63, %eax
2399-
; X64-SSE2-NEXT: movq -128(%rsp,%rax), %rcx
2400-
; X64-SSE2-NEXT: movq -120(%rsp,%rax), %rsi
2401-
; X64-SSE2-NEXT: movq -104(%rsp,%rax), %rdi
2402-
; X64-SSE2-NEXT: movq -112(%rsp,%rax), %r8
2403-
; X64-SSE2-NEXT: movq -88(%rsp,%rax), %r9
2404-
; X64-SSE2-NEXT: movq -96(%rsp,%rax), %r10
2405-
; X64-SSE2-NEXT: movq -72(%rsp,%rax), %r11
2406-
; X64-SSE2-NEXT: movq -80(%rsp,%rax), %rax
2407-
; X64-SSE2-NEXT: movq %rax, 48(%rdx)
2397+
; X64-SSE2-NEXT: andl $63, %esi
2398+
; X64-SSE2-NEXT: movq -128(%rsp,%rsi), %rax
2399+
; X64-SSE2-NEXT: movq -120(%rsp,%rsi), %rcx
2400+
; X64-SSE2-NEXT: movq -104(%rsp,%rsi), %rdi
2401+
; X64-SSE2-NEXT: movq -112(%rsp,%rsi), %r8
2402+
; X64-SSE2-NEXT: movq -88(%rsp,%rsi), %r9
2403+
; X64-SSE2-NEXT: movq -96(%rsp,%rsi), %r10
2404+
; X64-SSE2-NEXT: movq -72(%rsp,%rsi), %r11
2405+
; X64-SSE2-NEXT: movq -80(%rsp,%rsi), %rsi
2406+
; X64-SSE2-NEXT: movq %rsi, 48(%rdx)
24082407
; X64-SSE2-NEXT: movq %r11, 56(%rdx)
24092408
; X64-SSE2-NEXT: movq %r10, 32(%rdx)
24102409
; X64-SSE2-NEXT: movq %r9, 40(%rdx)
24112410
; X64-SSE2-NEXT: movq %r8, 16(%rdx)
24122411
; X64-SSE2-NEXT: movq %rdi, 24(%rdx)
2413-
; X64-SSE2-NEXT: movq %rcx, (%rdx)
2414-
; X64-SSE2-NEXT: movq %rsi, 8(%rdx)
2412+
; X64-SSE2-NEXT: movq %rax, (%rdx)
2413+
; X64-SSE2-NEXT: movq %rcx, 8(%rdx)
24152414
; X64-SSE2-NEXT: popq %rbx
2416-
; X64-SSE2-NEXT: popq %r14
24172415
; X64-SSE2-NEXT: retq
24182416
;
24192417
; X64-SSE42-LABEL: ashr_64bytes:
@@ -2443,9 +2441,9 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
24432441
; X64-SSE42-NEXT: movups -112(%rsp,%rsi), %xmm1
24442442
; X64-SSE42-NEXT: movups -96(%rsp,%rsi), %xmm2
24452443
; X64-SSE42-NEXT: movups -80(%rsp,%rsi), %xmm3
2444+
; X64-SSE42-NEXT: movups %xmm3, 48(%rdx)
24462445
; X64-SSE42-NEXT: movups %xmm1, 16(%rdx)
24472446
; X64-SSE42-NEXT: movups %xmm2, 32(%rdx)
2448-
; X64-SSE42-NEXT: movups %xmm3, 48(%rdx)
24492447
; X64-SSE42-NEXT: movups %xmm0, (%rdx)
24502448
; X64-SSE42-NEXT: retq
24512449
;
@@ -2474,9 +2472,9 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
24742472
; X64-AVX-NEXT: vmovups -112(%rsp,%rsi), %xmm1
24752473
; X64-AVX-NEXT: vmovups -96(%rsp,%rsi), %xmm2
24762474
; X64-AVX-NEXT: vmovups -80(%rsp,%rsi), %xmm3
2475+
; X64-AVX-NEXT: vmovups %xmm3, 48(%rdx)
24772476
; X64-AVX-NEXT: vmovups %xmm1, 16(%rdx)
24782477
; X64-AVX-NEXT: vmovups %xmm2, 32(%rdx)
2479-
; X64-AVX-NEXT: vmovups %xmm3, 48(%rdx)
24802478
; X64-AVX-NEXT: vmovups %xmm0, (%rdx)
24812479
; X64-AVX-NEXT: vzeroupper
24822480
; X64-AVX-NEXT: retq
@@ -2772,5 +2770,5 @@ define void @ashr_64bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
27722770
; FALLBACK7: {{.*}}
27732771
; FALLBACK8: {{.*}}
27742772
; FALLBACK9: {{.*}}
2775-
; X86: {{.*}}
27762773
; X64: {{.*}}
2774+
; X86: {{.*}}

0 commit comments

Comments
 (0)