Skip to content

[DAGCombiner] Don't drop atomic property of original load. #75626

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15165,8 +15165,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
*LN0->getMemOperand())) {
SDValue Load =
DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
LN0->getPointerInfo(), LN0->getAlign(),
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
LN0->getMemOperand());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3647,10 +3647,10 @@ define void @vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3(ptr %i
; AVX-NEXT: vmovdqa (%rdi), %xmm2
; AVX-NEXT: vmovdqa 16(%rdi), %xmm3
; AVX-NEXT: vpaddb 48(%rsi), %xmm3, %xmm3
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
; AVX-NEXT: vmovdqa %xmm3, 48(%rdx)
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
; AVX-NEXT: vzeroupper
Expand Down Expand Up @@ -3833,10 +3833,10 @@ define void @vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3(ptr %i
; AVX-NEXT: vmovdqa (%rdi), %xmm2
; AVX-NEXT: vmovdqa 16(%rdi), %xmm3
; AVX-NEXT: vpaddb 48(%rsi), %xmm3, %xmm3
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
; AVX-NEXT: vmovdqa %xmm3, 48(%rdx)
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
; AVX-NEXT: vzeroupper
Expand Down
160 changes: 81 additions & 79 deletions llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -501,136 +501,138 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
; SSE-LABEL: load_i16_stride2_vf64:
; SSE: # %bb.0:
; SSE-NEXT: subq $40, %rsp
; SSE-NEXT: movdqa 96(%rdi), %xmm13
; SSE-NEXT: movdqa %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 112(%rdi), %xmm3
; SSE-NEXT: movdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 128(%rdi), %xmm11
; SSE-NEXT: movdqa %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 144(%rdi), %xmm2
; SSE-NEXT: movdqa 160(%rdi), %xmm14
; SSE-NEXT: movdqa %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 176(%rdi), %xmm2
; SSE-NEXT: movdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 160(%rdi), %xmm10
; SSE-NEXT: movdqa %xmm10, (%rsp) # 16-byte Spill
; SSE-NEXT: movdqa 176(%rdi), %xmm4
; SSE-NEXT: movdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa (%rdi), %xmm9
; SSE-NEXT: movdqa %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 16(%rdi), %xmm1
; SSE-NEXT: movdqa 64(%rdi), %xmm11
; SSE-NEXT: movdqa %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 80(%rdi), %xmm1
; SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 32(%rdi), %xmm12
; SSE-NEXT: movdqa %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 48(%rdi), %xmm14
; SSE-NEXT: movdqa %xmm14, %xmm0
; SSE-NEXT: movdqa 96(%rdi), %xmm9
; SSE-NEXT: movdqa %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 112(%rdi), %xmm4
; SSE-NEXT: movdqa %xmm4, (%rsp) # 16-byte Spill
; SSE-NEXT: movdqa (%rdi), %xmm10
; SSE-NEXT: movdqa %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 16(%rdi), %xmm7
; SSE-NEXT: movdqa %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 32(%rdi), %xmm13
; SSE-NEXT: movdqa %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa 48(%rdi), %xmm0
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: pslld $16, %xmm12
; SSE-NEXT: psrad $16, %xmm12
; SSE-NEXT: packssdw %xmm0, %xmm12
; SSE-NEXT: movdqa %xmm4, %xmm0
; SSE-NEXT: pslld $16, %xmm13
; SSE-NEXT: psrad $16, %xmm13
; SSE-NEXT: packssdw %xmm0, %xmm13
; SSE-NEXT: movdqa %xmm7, %xmm0
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: pslld $16, %xmm10
; SSE-NEXT: psrad $16, %xmm10
; SSE-NEXT: packssdw %xmm0, %xmm10
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: movdqa %xmm4, %xmm0
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: pslld $16, %xmm9
; SSE-NEXT: psrad $16, %xmm9
; SSE-NEXT: packssdw %xmm0, %xmm9
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: pslld $16, %xmm11
; SSE-NEXT: psrad $16, %xmm11
; SSE-NEXT: packssdw %xmm0, %xmm11
; SSE-NEXT: movdqa %xmm3, %xmm0
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: pslld $16, %xmm13
; SSE-NEXT: psrad $16, %xmm13
; SSE-NEXT: packssdw %xmm0, %xmm13
; SSE-NEXT: movdqa 240(%rdi), %xmm0
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: pslld $16, %xmm14
; SSE-NEXT: psrad $16, %xmm14
; SSE-NEXT: packssdw %xmm0, %xmm14
; SSE-NEXT: movdqa 144(%rdi), %xmm7
; SSE-NEXT: movdqa %xmm7, %xmm0
; SSE-NEXT: pslld $16, %xmm0
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: movdqa 224(%rdi), %xmm7
; SSE-NEXT: movdqa %xmm7, %xmm15
; SSE-NEXT: movdqa 128(%rdi), %xmm8
; SSE-NEXT: movdqa %xmm8, %xmm15
; SSE-NEXT: pslld $16, %xmm15
; SSE-NEXT: psrad $16, %xmm15
; SSE-NEXT: packssdw %xmm0, %xmm15
; SSE-NEXT: movdqa 80(%rdi), %xmm3
; SSE-NEXT: movdqa %xmm3, %xmm1
; SSE-NEXT: movdqa 240(%rdi), %xmm12
; SSE-NEXT: movdqa %xmm12, %xmm1
; SSE-NEXT: pslld $16, %xmm1
; SSE-NEXT: psrad $16, %xmm1
; SSE-NEXT: movdqa 64(%rdi), %xmm5
; SSE-NEXT: movdqa %xmm5, %xmm4
; SSE-NEXT: movdqa 224(%rdi), %xmm5
; SSE-NEXT: movdqa %xmm5, %xmm3
; SSE-NEXT: pslld $16, %xmm3
; SSE-NEXT: psrad $16, %xmm3
; SSE-NEXT: packssdw %xmm1, %xmm3
; SSE-NEXT: movdqa 208(%rdi), %xmm6
; SSE-NEXT: movdqa %xmm6, %xmm4
; SSE-NEXT: pslld $16, %xmm4
; SSE-NEXT: psrad $16, %xmm4
; SSE-NEXT: packssdw %xmm1, %xmm4
; SSE-NEXT: movdqa 208(%rdi), %xmm8
; SSE-NEXT: movdqa %xmm8, %xmm6
; SSE-NEXT: pslld $16, %xmm6
; SSE-NEXT: psrad $16, %xmm6
; SSE-NEXT: movdqa 192(%rdi), %xmm2
; SSE-NEXT: movdqa %xmm2, %xmm1
; SSE-NEXT: pslld $16, %xmm1
; SSE-NEXT: psrad $16, %xmm1
; SSE-NEXT: packssdw %xmm6, %xmm1
; SSE-NEXT: psrad $16, %xmm14
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: packssdw %xmm14, %xmm0
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: packssdw %xmm4, %xmm1
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm6
; SSE-NEXT: packssdw %xmm0, %xmm6
; SSE-NEXT: movdqa %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm4
; SSE-NEXT: packssdw %xmm0, %xmm4
; SSE-NEXT: movdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm14
; SSE-NEXT: packssdw %xmm0, %xmm14
; SSE-NEXT: psrad $16, %xmm3
; SSE-NEXT: psrad $16, %xmm5
; SSE-NEXT: packssdw %xmm3, %xmm5
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm4
; SSE-NEXT: packssdw %xmm0, %xmm4
; SSE-NEXT: movdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: movdqa (%rsp), %xmm6 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm6
; SSE-NEXT: packssdw %xmm0, %xmm6
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm4
; SSE-NEXT: packssdw %xmm0, %xmm4
; SSE-NEXT: movdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm3
; SSE-NEXT: packssdw %xmm0, %xmm3
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm4
; SSE-NEXT: packssdw %xmm0, %xmm4
; SSE-NEXT: psrad $16, %xmm7
; SSE-NEXT: psrad $16, %xmm8
; SSE-NEXT: packssdw %xmm7, %xmm8
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm0
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; SSE-NEXT: psrad $16, %xmm7
; SSE-NEXT: packssdw %xmm0, %xmm7
; SSE-NEXT: psrad $16, %xmm8
; SSE-NEXT: psrad $16, %xmm6
; SSE-NEXT: psrad $16, %xmm2
; SSE-NEXT: packssdw %xmm8, %xmm2
; SSE-NEXT: packssdw %xmm6, %xmm2
; SSE-NEXT: psrad $16, %xmm12
; SSE-NEXT: psrad $16, %xmm5
; SSE-NEXT: packssdw %xmm12, %xmm5
; SSE-NEXT: movdqa %xmm1, 96(%rsi)
; SSE-NEXT: movdqa %xmm4, 32(%rsi)
; SSE-NEXT: movdqa %xmm15, 112(%rsi)
; SSE-NEXT: movdqa %xmm13, 48(%rsi)
; SSE-NEXT: movdqa %xmm11, 64(%rsi)
; SSE-NEXT: movdqa %xmm9, (%rsi)
; SSE-NEXT: movdqa %xmm10, 80(%rsi)
; SSE-NEXT: movdqa %xmm12, 16(%rsi)
; SSE-NEXT: movdqa %xmm3, 112(%rsi)
; SSE-NEXT: movdqa %xmm15, 64(%rsi)
; SSE-NEXT: movdqa %xmm14, 80(%rsi)
; SSE-NEXT: movdqa %xmm11, 32(%rsi)
; SSE-NEXT: movdqa %xmm9, 48(%rsi)
; SSE-NEXT: movdqa %xmm10, (%rsi)
; SSE-NEXT: movdqa %xmm13, 16(%rsi)
; SSE-NEXT: movdqa %xmm5, 112(%rdx)
; SSE-NEXT: movdqa %xmm2, 96(%rdx)
; SSE-NEXT: movdqa %xmm7, 112(%rdx)
; SSE-NEXT: movdqa %xmm3, 64(%rdx)
; SSE-NEXT: movdqa %xmm6, 80(%rdx)
; SSE-NEXT: movdqa %xmm5, 32(%rdx)
; SSE-NEXT: movdqa %xmm14, 48(%rdx)
; SSE-NEXT: movdqa %xmm7, 80(%rdx)
; SSE-NEXT: movdqa %xmm8, 64(%rdx)
; SSE-NEXT: movdqa %xmm4, 48(%rdx)
; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-NEXT: movaps %xmm0, (%rdx)
; SSE-NEXT: movaps %xmm0, 32(%rdx)
; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-NEXT: movaps %xmm0, 16(%rdx)
; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-NEXT: movaps %xmm0, (%rdx)
; SSE-NEXT: addq $40, %rsp
; SSE-NEXT: retq
;
Expand Down
Loading