@@ -516,23 +516,14 @@ define void @compressstore_v16f64_v16i1(ptr %base, <16 x double> %V, <16 x i1> %
516
516
; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
517
517
; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
518
518
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1
519
- ; AVX512F-NEXT: kmovw %k1, %eax
520
- ; AVX512F-NEXT: movzbl %al, %ecx
521
- ; AVX512F-NEXT: shrl %eax
522
- ; AVX512F-NEXT: andl $85, %eax
523
- ; AVX512F-NEXT: subl %eax, %ecx
524
- ; AVX512F-NEXT: movl %ecx, %eax
525
- ; AVX512F-NEXT: andl $858993459, %eax ## imm = 0x33333333
526
- ; AVX512F-NEXT: shrl $2, %ecx
527
- ; AVX512F-NEXT: andl $858993459, %ecx ## imm = 0x33333333
528
- ; AVX512F-NEXT: addl %eax, %ecx
529
- ; AVX512F-NEXT: movl %ecx, %eax
530
- ; AVX512F-NEXT: shrl $4, %eax
531
- ; AVX512F-NEXT: addl %ecx, %eax
532
- ; AVX512F-NEXT: andl $252645135, %eax ## imm = 0xF0F0F0F
533
- ; AVX512F-NEXT: imull $16843009, %eax, %eax ## imm = 0x1010101
534
- ; AVX512F-NEXT: shrl $24, %eax
535
519
; AVX512F-NEXT: kshiftrw $8, %k1, %k2
520
+ ; AVX512F-NEXT: kmovw %k1, %eax
521
+ ; AVX512F-NEXT: movzbl %al, %eax
522
+ ; AVX512F-NEXT: imull $134480385, %eax, %eax ## imm = 0x8040201
523
+ ; AVX512F-NEXT: shrl $3, %eax
524
+ ; AVX512F-NEXT: andl $286331153, %eax ## imm = 0x11111111
525
+ ; AVX512F-NEXT: imull $286331153, %eax, %eax ## imm = 0x11111111
526
+ ; AVX512F-NEXT: shrl $28, %eax
536
527
; AVX512F-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2}
537
528
; AVX512F-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
538
529
; AVX512F-NEXT: vzeroupper
@@ -543,23 +534,13 @@ define void @compressstore_v16f64_v16i1(ptr %base, <16 x double> %V, <16 x i1> %
543
534
; AVX512VLDQ-NEXT: vpmovsxbd %xmm2, %zmm2
544
535
; AVX512VLDQ-NEXT: vpslld $31, %zmm2, %zmm2
545
536
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k1
546
- ; AVX512VLDQ-NEXT: kmovb %k1, %eax
547
- ; AVX512VLDQ-NEXT: movl %eax, %ecx
548
- ; AVX512VLDQ-NEXT: shrl %ecx
549
- ; AVX512VLDQ-NEXT: andl $-43, %ecx
550
- ; AVX512VLDQ-NEXT: subl %ecx, %eax
551
- ; AVX512VLDQ-NEXT: movl %eax, %ecx
552
- ; AVX512VLDQ-NEXT: andl $858993459, %ecx ## imm = 0x33333333
553
- ; AVX512VLDQ-NEXT: shrl $2, %eax
554
- ; AVX512VLDQ-NEXT: andl $858993459, %eax ## imm = 0x33333333
555
- ; AVX512VLDQ-NEXT: addl %ecx, %eax
556
- ; AVX512VLDQ-NEXT: movl %eax, %ecx
557
- ; AVX512VLDQ-NEXT: shrl $4, %ecx
558
- ; AVX512VLDQ-NEXT: addl %eax, %ecx
559
- ; AVX512VLDQ-NEXT: andl $252645135, %ecx ## imm = 0xF0F0F0F
560
- ; AVX512VLDQ-NEXT: imull $16843009, %ecx, %eax ## imm = 0x1010101
561
- ; AVX512VLDQ-NEXT: shrl $24, %eax
562
537
; AVX512VLDQ-NEXT: kshiftrw $8, %k1, %k2
538
+ ; AVX512VLDQ-NEXT: kmovb %k1, %eax
539
+ ; AVX512VLDQ-NEXT: imull $134480385, %eax, %eax ## imm = 0x8040201
540
+ ; AVX512VLDQ-NEXT: shrl $3, %eax
541
+ ; AVX512VLDQ-NEXT: andl $286331153, %eax ## imm = 0x11111111
542
+ ; AVX512VLDQ-NEXT: imull $286331153, %eax, %eax ## imm = 0x11111111
543
+ ; AVX512VLDQ-NEXT: shrl $28, %eax
563
544
; AVX512VLDQ-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2}
564
545
; AVX512VLDQ-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
565
546
; AVX512VLDQ-NEXT: vzeroupper
@@ -569,23 +550,14 @@ define void @compressstore_v16f64_v16i1(ptr %base, <16 x double> %V, <16 x i1> %
569
550
; AVX512VLBW: ## %bb.0:
570
551
; AVX512VLBW-NEXT: vpsllw $7, %xmm2, %xmm2
571
552
; AVX512VLBW-NEXT: vpmovb2m %xmm2, %k1
572
- ; AVX512VLBW-NEXT: kmovd %k1, %eax
573
- ; AVX512VLBW-NEXT: movzbl %al, %ecx
574
- ; AVX512VLBW-NEXT: shrl %eax
575
- ; AVX512VLBW-NEXT: andl $85, %eax
576
- ; AVX512VLBW-NEXT: subl %eax, %ecx
577
- ; AVX512VLBW-NEXT: movl %ecx, %eax
578
- ; AVX512VLBW-NEXT: andl $858993459, %eax ## imm = 0x33333333
579
- ; AVX512VLBW-NEXT: shrl $2, %ecx
580
- ; AVX512VLBW-NEXT: andl $858993459, %ecx ## imm = 0x33333333
581
- ; AVX512VLBW-NEXT: addl %eax, %ecx
582
- ; AVX512VLBW-NEXT: movl %ecx, %eax
583
- ; AVX512VLBW-NEXT: shrl $4, %eax
584
- ; AVX512VLBW-NEXT: addl %ecx, %eax
585
- ; AVX512VLBW-NEXT: andl $252645135, %eax ## imm = 0xF0F0F0F
586
- ; AVX512VLBW-NEXT: imull $16843009, %eax, %eax ## imm = 0x1010101
587
- ; AVX512VLBW-NEXT: shrl $24, %eax
588
553
; AVX512VLBW-NEXT: kshiftrw $8, %k1, %k2
554
+ ; AVX512VLBW-NEXT: kmovd %k1, %eax
555
+ ; AVX512VLBW-NEXT: movzbl %al, %eax
556
+ ; AVX512VLBW-NEXT: imull $134480385, %eax, %eax ## imm = 0x8040201
557
+ ; AVX512VLBW-NEXT: shrl $3, %eax
558
+ ; AVX512VLBW-NEXT: andl $286331153, %eax ## imm = 0x11111111
559
+ ; AVX512VLBW-NEXT: imull $286331153, %eax, %eax ## imm = 0x11111111
560
+ ; AVX512VLBW-NEXT: shrl $28, %eax
589
561
; AVX512VLBW-NEXT: vcompresspd %zmm1, (%rdi,%rax,8) {%k2}
590
562
; AVX512VLBW-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
591
563
; AVX512VLBW-NEXT: vzeroupper
0 commit comments