@@ -347,19 +347,19 @@ define void @test_mscatter_v17f32(float* %base, <17 x i32> %index, <17 x float>
347
347
; WIDEN_AVX2-LABEL: test_mscatter_v17f32:
348
348
; WIDEN_AVX2: # %bb.0:
349
349
; WIDEN_AVX2-NEXT: vmovq %rdi, %xmm8
350
- ; WIDEN_AVX2-NEXT: vpbroadcastq %xmm8, %ymm9
351
- ; WIDEN_AVX2-NEXT: vmovd %esi, %xmm10
352
- ; WIDEN_AVX2-NEXT: vpinsrd $1, %edx, %xmm10 , %xmm10
353
- ; WIDEN_AVX2-NEXT: vpinsrd $2, %ecx, %xmm10 , %xmm10
354
- ; WIDEN_AVX2-NEXT: vpinsrd $3, %r8d, %xmm10 , %xmm10
355
- ; WIDEN_AVX2-NEXT: vpmovsxdq %xmm10 , %ymm10
356
- ; WIDEN_AVX2-NEXT: vpsllq $2, %ymm10 , %ymm10
357
- ; WIDEN_AVX2-NEXT: vpaddq %ymm10 , %ymm9 , %ymm10
358
- ; WIDEN_AVX2-NEXT: vmovq %xmm10 , %rax
350
+ ; WIDEN_AVX2-NEXT: vpbroadcastq %xmm8, %ymm8
351
+ ; WIDEN_AVX2-NEXT: vmovd %esi, %xmm9
352
+ ; WIDEN_AVX2-NEXT: vpinsrd $1, %edx, %xmm9 , %xmm9
353
+ ; WIDEN_AVX2-NEXT: vpinsrd $2, %ecx, %xmm9 , %xmm9
354
+ ; WIDEN_AVX2-NEXT: vpinsrd $3, %r8d, %xmm9 , %xmm9
355
+ ; WIDEN_AVX2-NEXT: vpmovsxdq %xmm9 , %ymm9
356
+ ; WIDEN_AVX2-NEXT: vpsllq $2, %ymm9 , %ymm9
357
+ ; WIDEN_AVX2-NEXT: vpaddq %ymm9 , %ymm8 , %ymm9
358
+ ; WIDEN_AVX2-NEXT: vmovq %xmm9 , %rax
359
359
; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rax)
360
- ; WIDEN_AVX2-NEXT: vpextrq $1, %xmm10 , %rax
360
+ ; WIDEN_AVX2-NEXT: vpextrq $1, %xmm9 , %rax
361
361
; WIDEN_AVX2-NEXT: vmovss %xmm1, (%rax)
362
- ; WIDEN_AVX2-NEXT: vextracti128 $1, %ymm10 , %xmm0
362
+ ; WIDEN_AVX2-NEXT: vextracti128 $1, %ymm9 , %xmm0
363
363
; WIDEN_AVX2-NEXT: vmovq %xmm0, %rax
364
364
; WIDEN_AVX2-NEXT: vmovss %xmm2, (%rax)
365
365
; WIDEN_AVX2-NEXT: vpextrq $1, %xmm0, %rax
@@ -370,7 +370,7 @@ define void @test_mscatter_v17f32(float* %base, <17 x i32> %index, <17 x float>
370
370
; WIDEN_AVX2-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0
371
371
; WIDEN_AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
372
372
; WIDEN_AVX2-NEXT: vpsllq $2, %ymm0, %ymm0
373
- ; WIDEN_AVX2-NEXT: vpaddq %ymm0, %ymm9 , %ymm0
373
+ ; WIDEN_AVX2-NEXT: vpaddq %ymm0, %ymm8 , %ymm0
374
374
; WIDEN_AVX2-NEXT: vmovq %xmm0, %rax
375
375
; WIDEN_AVX2-NEXT: vmovss %xmm4, (%rax)
376
376
; WIDEN_AVX2-NEXT: vpextrq $1, %xmm0, %rax
@@ -379,45 +379,45 @@ define void @test_mscatter_v17f32(float* %base, <17 x i32> %index, <17 x float>
379
379
; WIDEN_AVX2-NEXT: vmovq %xmm0, %rax
380
380
; WIDEN_AVX2-NEXT: vmovss %xmm6, (%rax)
381
381
; WIDEN_AVX2-NEXT: vpextrq $1, %xmm0, %rax
382
- ; WIDEN_AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
383
- ; WIDEN_AVX2-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm0, %xmm0
384
- ; WIDEN_AVX2-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm0, %xmm0
385
- ; WIDEN_AVX2-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm0, %xmm0
386
- ; WIDEN_AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
387
- ; WIDEN_AVX2-NEXT: vpsllq $2, %ymm0, %ymm0
388
- ; WIDEN_AVX2-NEXT: vpaddq %ymm0, %ymm9, %ymm0
382
+ ; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
389
383
; WIDEN_AVX2-NEXT: vmovss %xmm7, (%rax)
390
384
; WIDEN_AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
391
385
; WIDEN_AVX2-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1
392
386
; WIDEN_AVX2-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1
393
387
; WIDEN_AVX2-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1
394
388
; WIDEN_AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
395
389
; WIDEN_AVX2-NEXT: vpsllq $2, %ymm1, %ymm1
396
- ; WIDEN_AVX2-NEXT: vpaddq %ymm1, %ymm9, %ymm1
397
- ; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
390
+ ; WIDEN_AVX2-NEXT: vpaddq %ymm1, %ymm8, %ymm1
398
391
; WIDEN_AVX2-NEXT: vmovq %xmm1, %rax
399
- ; WIDEN_AVX2-NEXT: vmovss %xmm2 , (%rax)
400
- ; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
392
+ ; WIDEN_AVX2-NEXT: vmovss %xmm0 , (%rax)
393
+ ; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
401
394
; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax
402
- ; WIDEN_AVX2-NEXT: vmovss %xmm2 , (%rax)
403
- ; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
395
+ ; WIDEN_AVX2-NEXT: vmovss %xmm0 , (%rax)
396
+ ; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
404
397
; WIDEN_AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
405
398
; WIDEN_AVX2-NEXT: vmovq %xmm1, %rax
406
- ; WIDEN_AVX2-NEXT: vmovss %xmm2, (%rax)
399
+ ; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rax)
400
+ ; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
407
401
; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax
408
402
; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
409
403
; WIDEN_AVX2-NEXT: vmovss %xmm1, (%rax)
410
- ; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
411
- ; WIDEN_AVX2-NEXT: vmovq %xmm0, %rax
412
- ; WIDEN_AVX2-NEXT: vmovss %xmm1, (%rax)
413
- ; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
414
- ; WIDEN_AVX2-NEXT: vpextrq $1, %xmm0, %rax
415
- ; WIDEN_AVX2-NEXT: vmovss %xmm1, (%rax)
416
- ; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
417
- ; WIDEN_AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
418
- ; WIDEN_AVX2-NEXT: vmovq %xmm0, %rax
419
- ; WIDEN_AVX2-NEXT: vmovss %xmm1, (%rax)
420
- ; WIDEN_AVX2-NEXT: vpextrq $1, %xmm0, %rax
404
+ ; WIDEN_AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
405
+ ; WIDEN_AVX2-NEXT: vpinsrd $1, {{[0-9]+}}(%rsp), %xmm1, %xmm1
406
+ ; WIDEN_AVX2-NEXT: vpinsrd $2, {{[0-9]+}}(%rsp), %xmm1, %xmm1
407
+ ; WIDEN_AVX2-NEXT: vpinsrd $3, {{[0-9]+}}(%rsp), %xmm1, %xmm1
408
+ ; WIDEN_AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
409
+ ; WIDEN_AVX2-NEXT: vpsllq $2, %ymm1, %ymm1
410
+ ; WIDEN_AVX2-NEXT: vpaddq %ymm1, %ymm8, %ymm1
411
+ ; WIDEN_AVX2-NEXT: vmovq %xmm1, %rax
412
+ ; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rax)
413
+ ; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
414
+ ; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax
415
+ ; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rax)
416
+ ; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
417
+ ; WIDEN_AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
418
+ ; WIDEN_AVX2-NEXT: vmovq %xmm1, %rax
419
+ ; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rax)
420
+ ; WIDEN_AVX2-NEXT: vpextrq $1, %xmm1, %rax
421
421
; WIDEN_AVX2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
422
422
; WIDEN_AVX2-NEXT: vmovss %xmm0, (%rax)
423
423
; WIDEN_AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
0 commit comments