Skip to content

Commit 1f6f2f8

Browse files
committed
[DAG] Retain original alignment in bitcast(load(x)) -> load(x) fold
Pulled out of llvm#75626 to allow it focus on atomic loads
1 parent f412b78 commit 1f6f2f8

8 files changed

+260
-258
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15165,7 +15165,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
1516515165
*LN0->getMemOperand())) {
1516615166
SDValue Load =
1516715167
DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15168-
LN0->getPointerInfo(), LN0->getAlign(),
15168+
LN0->getPointerInfo(), LN0->getOriginalAlign(),
1516915169
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
1517015170
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
1517115171
return Load;

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3647,10 +3647,10 @@ define void @vec384_i32_widen_to_i128_factor4_broadcast_to_v3i128_factor3(ptr %i
36473647
; AVX-NEXT: vmovdqa (%rdi), %xmm2
36483648
; AVX-NEXT: vmovdqa 16(%rdi), %xmm3
36493649
; AVX-NEXT: vpaddb 48(%rsi), %xmm3, %xmm3
3650-
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
36513650
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
3652-
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
3651+
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
36533652
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
3653+
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
36543654
; AVX-NEXT: vmovdqa %xmm3, 48(%rdx)
36553655
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
36563656
; AVX-NEXT: vzeroupper
@@ -3833,10 +3833,10 @@ define void @vec384_i64_widen_to_i128_factor2_broadcast_to_v3i128_factor3(ptr %i
38333833
; AVX-NEXT: vmovdqa (%rdi), %xmm2
38343834
; AVX-NEXT: vmovdqa 16(%rdi), %xmm3
38353835
; AVX-NEXT: vpaddb 48(%rsi), %xmm3, %xmm3
3836-
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
38373836
; AVX-NEXT: vpaddb 16(%rsi), %xmm0, %xmm0
3838-
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
3837+
; AVX-NEXT: vpaddb 32(%rsi), %xmm2, %xmm2
38393838
; AVX-NEXT: vmovdqa %xmm2, 32(%rdx)
3839+
; AVX-NEXT: vmovdqa %xmm0, 16(%rdx)
38403840
; AVX-NEXT: vmovdqa %xmm3, 48(%rdx)
38413841
; AVX-NEXT: vmovdqa %xmm1, (%rdx)
38423842
; AVX-NEXT: vzeroupper

llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-2.ll

Lines changed: 81 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -501,136 +501,138 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
501501
; SSE-LABEL: load_i16_stride2_vf64:
502502
; SSE: # %bb.0:
503503
; SSE-NEXT: subq $40, %rsp
504-
; SSE-NEXT: movdqa 96(%rdi), %xmm13
505-
; SSE-NEXT: movdqa %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
506-
; SSE-NEXT: movdqa 112(%rdi), %xmm3
507-
; SSE-NEXT: movdqa %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
508-
; SSE-NEXT: movdqa 128(%rdi), %xmm11
509-
; SSE-NEXT: movdqa %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
510-
; SSE-NEXT: movdqa 144(%rdi), %xmm2
504+
; SSE-NEXT: movdqa 160(%rdi), %xmm14
505+
; SSE-NEXT: movdqa %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
506+
; SSE-NEXT: movdqa 176(%rdi), %xmm2
511507
; SSE-NEXT: movdqa %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
512-
; SSE-NEXT: movdqa 160(%rdi), %xmm10
513-
; SSE-NEXT: movdqa %xmm10, (%rsp) # 16-byte Spill
514-
; SSE-NEXT: movdqa 176(%rdi), %xmm4
515-
; SSE-NEXT: movdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
516-
; SSE-NEXT: movdqa (%rdi), %xmm9
517-
; SSE-NEXT: movdqa %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
518-
; SSE-NEXT: movdqa 16(%rdi), %xmm1
508+
; SSE-NEXT: movdqa 64(%rdi), %xmm11
509+
; SSE-NEXT: movdqa %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
510+
; SSE-NEXT: movdqa 80(%rdi), %xmm1
519511
; SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
520-
; SSE-NEXT: movdqa 32(%rdi), %xmm12
521-
; SSE-NEXT: movdqa %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
522-
; SSE-NEXT: movdqa 48(%rdi), %xmm14
523-
; SSE-NEXT: movdqa %xmm14, %xmm0
512+
; SSE-NEXT: movdqa 96(%rdi), %xmm9
513+
; SSE-NEXT: movdqa %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
514+
; SSE-NEXT: movdqa 112(%rdi), %xmm4
515+
; SSE-NEXT: movdqa %xmm4, (%rsp) # 16-byte Spill
516+
; SSE-NEXT: movdqa (%rdi), %xmm10
517+
; SSE-NEXT: movdqa %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
518+
; SSE-NEXT: movdqa 16(%rdi), %xmm7
519+
; SSE-NEXT: movdqa %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
520+
; SSE-NEXT: movdqa 32(%rdi), %xmm13
521+
; SSE-NEXT: movdqa %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
522+
; SSE-NEXT: movdqa 48(%rdi), %xmm0
523+
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
524524
; SSE-NEXT: pslld $16, %xmm0
525525
; SSE-NEXT: psrad $16, %xmm0
526-
; SSE-NEXT: pslld $16, %xmm12
527-
; SSE-NEXT: psrad $16, %xmm12
528-
; SSE-NEXT: packssdw %xmm0, %xmm12
529-
; SSE-NEXT: movdqa %xmm4, %xmm0
526+
; SSE-NEXT: pslld $16, %xmm13
527+
; SSE-NEXT: psrad $16, %xmm13
528+
; SSE-NEXT: packssdw %xmm0, %xmm13
529+
; SSE-NEXT: movdqa %xmm7, %xmm0
530530
; SSE-NEXT: pslld $16, %xmm0
531531
; SSE-NEXT: psrad $16, %xmm0
532532
; SSE-NEXT: pslld $16, %xmm10
533533
; SSE-NEXT: psrad $16, %xmm10
534534
; SSE-NEXT: packssdw %xmm0, %xmm10
535-
; SSE-NEXT: movdqa %xmm1, %xmm0
535+
; SSE-NEXT: movdqa %xmm4, %xmm0
536536
; SSE-NEXT: pslld $16, %xmm0
537537
; SSE-NEXT: psrad $16, %xmm0
538538
; SSE-NEXT: pslld $16, %xmm9
539539
; SSE-NEXT: psrad $16, %xmm9
540540
; SSE-NEXT: packssdw %xmm0, %xmm9
541-
; SSE-NEXT: movdqa %xmm2, %xmm0
541+
; SSE-NEXT: movdqa %xmm1, %xmm0
542542
; SSE-NEXT: pslld $16, %xmm0
543543
; SSE-NEXT: psrad $16, %xmm0
544544
; SSE-NEXT: pslld $16, %xmm11
545545
; SSE-NEXT: psrad $16, %xmm11
546546
; SSE-NEXT: packssdw %xmm0, %xmm11
547-
; SSE-NEXT: movdqa %xmm3, %xmm0
547+
; SSE-NEXT: movdqa %xmm2, %xmm0
548548
; SSE-NEXT: pslld $16, %xmm0
549549
; SSE-NEXT: psrad $16, %xmm0
550-
; SSE-NEXT: pslld $16, %xmm13
551-
; SSE-NEXT: psrad $16, %xmm13
552-
; SSE-NEXT: packssdw %xmm0, %xmm13
553-
; SSE-NEXT: movdqa 240(%rdi), %xmm0
554-
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
550+
; SSE-NEXT: pslld $16, %xmm14
551+
; SSE-NEXT: psrad $16, %xmm14
552+
; SSE-NEXT: packssdw %xmm0, %xmm14
553+
; SSE-NEXT: movdqa 144(%rdi), %xmm7
554+
; SSE-NEXT: movdqa %xmm7, %xmm0
555555
; SSE-NEXT: pslld $16, %xmm0
556556
; SSE-NEXT: psrad $16, %xmm0
557-
; SSE-NEXT: movdqa 224(%rdi), %xmm7
558-
; SSE-NEXT: movdqa %xmm7, %xmm15
557+
; SSE-NEXT: movdqa 128(%rdi), %xmm8
558+
; SSE-NEXT: movdqa %xmm8, %xmm15
559559
; SSE-NEXT: pslld $16, %xmm15
560560
; SSE-NEXT: psrad $16, %xmm15
561561
; SSE-NEXT: packssdw %xmm0, %xmm15
562-
; SSE-NEXT: movdqa 80(%rdi), %xmm3
563-
; SSE-NEXT: movdqa %xmm3, %xmm1
562+
; SSE-NEXT: movdqa 240(%rdi), %xmm12
563+
; SSE-NEXT: movdqa %xmm12, %xmm1
564564
; SSE-NEXT: pslld $16, %xmm1
565565
; SSE-NEXT: psrad $16, %xmm1
566-
; SSE-NEXT: movdqa 64(%rdi), %xmm5
567-
; SSE-NEXT: movdqa %xmm5, %xmm4
566+
; SSE-NEXT: movdqa 224(%rdi), %xmm5
567+
; SSE-NEXT: movdqa %xmm5, %xmm3
568+
; SSE-NEXT: pslld $16, %xmm3
569+
; SSE-NEXT: psrad $16, %xmm3
570+
; SSE-NEXT: packssdw %xmm1, %xmm3
571+
; SSE-NEXT: movdqa 208(%rdi), %xmm6
572+
; SSE-NEXT: movdqa %xmm6, %xmm4
568573
; SSE-NEXT: pslld $16, %xmm4
569574
; SSE-NEXT: psrad $16, %xmm4
570-
; SSE-NEXT: packssdw %xmm1, %xmm4
571-
; SSE-NEXT: movdqa 208(%rdi), %xmm8
572-
; SSE-NEXT: movdqa %xmm8, %xmm6
573-
; SSE-NEXT: pslld $16, %xmm6
574-
; SSE-NEXT: psrad $16, %xmm6
575575
; SSE-NEXT: movdqa 192(%rdi), %xmm2
576576
; SSE-NEXT: movdqa %xmm2, %xmm1
577577
; SSE-NEXT: pslld $16, %xmm1
578578
; SSE-NEXT: psrad $16, %xmm1
579-
; SSE-NEXT: packssdw %xmm6, %xmm1
580-
; SSE-NEXT: psrad $16, %xmm14
581-
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
582-
; SSE-NEXT: psrad $16, %xmm0
583-
; SSE-NEXT: packssdw %xmm14, %xmm0
584-
; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
579+
; SSE-NEXT: packssdw %xmm4, %xmm1
585580
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
586581
; SSE-NEXT: psrad $16, %xmm0
587-
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
588-
; SSE-NEXT: psrad $16, %xmm6
589-
; SSE-NEXT: packssdw %xmm0, %xmm6
590-
; SSE-NEXT: movdqa %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
582+
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
583+
; SSE-NEXT: psrad $16, %xmm4
584+
; SSE-NEXT: packssdw %xmm0, %xmm4
585+
; SSE-NEXT: movdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
591586
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
592587
; SSE-NEXT: psrad $16, %xmm0
593-
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
594-
; SSE-NEXT: psrad $16, %xmm14
595-
; SSE-NEXT: packssdw %xmm0, %xmm14
596-
; SSE-NEXT: psrad $16, %xmm3
597-
; SSE-NEXT: psrad $16, %xmm5
598-
; SSE-NEXT: packssdw %xmm3, %xmm5
588+
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
589+
; SSE-NEXT: psrad $16, %xmm4
590+
; SSE-NEXT: packssdw %xmm0, %xmm4
591+
; SSE-NEXT: movdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
599592
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
600593
; SSE-NEXT: psrad $16, %xmm0
601-
; SSE-NEXT: movdqa (%rsp), %xmm6 # 16-byte Reload
602-
; SSE-NEXT: psrad $16, %xmm6
603-
; SSE-NEXT: packssdw %xmm0, %xmm6
604-
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
594+
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
595+
; SSE-NEXT: psrad $16, %xmm4
596+
; SSE-NEXT: packssdw %xmm0, %xmm4
597+
; SSE-NEXT: movdqa %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
598+
; SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
605599
; SSE-NEXT: psrad $16, %xmm0
606-
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 16-byte Reload
607-
; SSE-NEXT: psrad $16, %xmm3
608-
; SSE-NEXT: packssdw %xmm0, %xmm3
600+
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm4 # 16-byte Reload
601+
; SSE-NEXT: psrad $16, %xmm4
602+
; SSE-NEXT: packssdw %xmm0, %xmm4
603+
; SSE-NEXT: psrad $16, %xmm7
604+
; SSE-NEXT: psrad $16, %xmm8
605+
; SSE-NEXT: packssdw %xmm7, %xmm8
609606
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
610607
; SSE-NEXT: psrad $16, %xmm0
608+
; SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
611609
; SSE-NEXT: psrad $16, %xmm7
612610
; SSE-NEXT: packssdw %xmm0, %xmm7
613-
; SSE-NEXT: psrad $16, %xmm8
611+
; SSE-NEXT: psrad $16, %xmm6
614612
; SSE-NEXT: psrad $16, %xmm2
615-
; SSE-NEXT: packssdw %xmm8, %xmm2
613+
; SSE-NEXT: packssdw %xmm6, %xmm2
614+
; SSE-NEXT: psrad $16, %xmm12
615+
; SSE-NEXT: psrad $16, %xmm5
616+
; SSE-NEXT: packssdw %xmm12, %xmm5
616617
; SSE-NEXT: movdqa %xmm1, 96(%rsi)
617-
; SSE-NEXT: movdqa %xmm4, 32(%rsi)
618-
; SSE-NEXT: movdqa %xmm15, 112(%rsi)
619-
; SSE-NEXT: movdqa %xmm13, 48(%rsi)
620-
; SSE-NEXT: movdqa %xmm11, 64(%rsi)
621-
; SSE-NEXT: movdqa %xmm9, (%rsi)
622-
; SSE-NEXT: movdqa %xmm10, 80(%rsi)
623-
; SSE-NEXT: movdqa %xmm12, 16(%rsi)
618+
; SSE-NEXT: movdqa %xmm3, 112(%rsi)
619+
; SSE-NEXT: movdqa %xmm15, 64(%rsi)
620+
; SSE-NEXT: movdqa %xmm14, 80(%rsi)
621+
; SSE-NEXT: movdqa %xmm11, 32(%rsi)
622+
; SSE-NEXT: movdqa %xmm9, 48(%rsi)
623+
; SSE-NEXT: movdqa %xmm10, (%rsi)
624+
; SSE-NEXT: movdqa %xmm13, 16(%rsi)
625+
; SSE-NEXT: movdqa %xmm5, 112(%rdx)
624626
; SSE-NEXT: movdqa %xmm2, 96(%rdx)
625-
; SSE-NEXT: movdqa %xmm7, 112(%rdx)
626-
; SSE-NEXT: movdqa %xmm3, 64(%rdx)
627-
; SSE-NEXT: movdqa %xmm6, 80(%rdx)
628-
; SSE-NEXT: movdqa %xmm5, 32(%rdx)
629-
; SSE-NEXT: movdqa %xmm14, 48(%rdx)
627+
; SSE-NEXT: movdqa %xmm7, 80(%rdx)
628+
; SSE-NEXT: movdqa %xmm8, 64(%rdx)
629+
; SSE-NEXT: movdqa %xmm4, 48(%rdx)
630630
; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
631-
; SSE-NEXT: movaps %xmm0, (%rdx)
631+
; SSE-NEXT: movaps %xmm0, 32(%rdx)
632632
; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
633633
; SSE-NEXT: movaps %xmm0, 16(%rdx)
634+
; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
635+
; SSE-NEXT: movaps %xmm0, (%rdx)
634636
; SSE-NEXT: addq $40, %rsp
635637
; SSE-NEXT: retq
636638
;

0 commit comments

Comments
 (0)