@@ -3077,11 +3077,9 @@ define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) {
3077
3077
; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
3078
3078
; GFX6-NEXT: v_and_b32_e32 v2, 15, v2
3079
3079
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
3080
- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3081
- ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
3082
- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
3083
3080
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3084
- ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
3081
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
3082
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1
3085
3083
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3086
3084
; GFX6-NEXT: s_setpc_b64 s[30:31]
3087
3085
;
@@ -3235,9 +3233,7 @@ define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt)
3235
3233
; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3236
3234
; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3237
3235
; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3238
- ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3239
3236
; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
3240
- ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3241
3237
; GFX6-NEXT: s_and_b32 s0, s1, 0xffff
3242
3238
; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
3243
3239
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
@@ -3570,26 +3566,22 @@ define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
3570
3566
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 14, v5
3571
3567
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
3572
3568
; GFX6-NEXT: v_or_b32_e32 v1, v1, v5
3569
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
3573
3570
; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4
3574
3571
; GFX6-NEXT: v_and_b32_e32 v6, 15, v4
3575
3572
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
3576
- ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
3577
3573
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3578
- ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
3579
3574
; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
3580
- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
3581
3575
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0
3582
3576
; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
3577
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
3583
3578
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
3584
3579
; GFX6-NEXT: v_and_b32_e32 v2, 15, v5
3585
3580
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5
3586
- ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
3587
3581
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3588
- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3589
3582
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
3590
3583
; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
3591
- ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4
3592
- ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2
3584
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
3593
3585
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
3594
3586
; GFX6-NEXT: s_setpc_b64 s[30:31]
3595
3587
;
@@ -3735,32 +3727,28 @@ define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %
3735
3727
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3736
3728
; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
3737
3729
; GFX6-NEXT: s_bfe_u32 s4, s2, 0xf0001
3738
- ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3739
3730
; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3740
3731
; GFX6-NEXT: s_lshr_b32 s4, s4, 14
3741
- ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
3742
- ; GFX6-NEXT: v_and_b32_e32 v2, 15, v0
3743
3732
; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3744
3733
; GFX6-NEXT: s_or_b32 s0, s0, s4
3745
3734
; GFX6-NEXT: s_lshl_b32 s2, s2, 1
3735
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
3736
+ ; GFX6-NEXT: v_and_b32_e32 v2, 15, v0
3737
+ ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3746
3738
; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3747
- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3748
3739
; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
3749
3740
; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001
3750
- ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3751
- ; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
3752
3741
; GFX6-NEXT: s_bfe_u32 s4, s3, 0xf0001
3753
- ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
3754
- ; GFX6-NEXT: v_and_b32_e32 v2, 15, v1
3755
- ; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
3742
+ ; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
3756
3743
; GFX6-NEXT: s_lshl_b32 s1, s1, 1
3757
3744
; GFX6-NEXT: s_lshr_b32 s4, s4, 14
3758
3745
; GFX6-NEXT: s_lshl_b32 s3, s3, 1
3759
- ; GFX6-NEXT: v_and_b32_e32 v1, 15, v1
3746
+ ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
3747
+ ; GFX6-NEXT: v_and_b32_e32 v2, 15, v1
3748
+ ; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
3760
3749
; GFX6-NEXT: s_or_b32 s1, s1, s4
3761
- ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3750
+ ; GFX6-NEXT: v_and_b32_e32 v1, 15, v1
3762
3751
; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001
3763
- ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3764
3752
; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2
3765
3753
; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
3766
3754
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
@@ -4358,26 +4346,22 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4358
4346
; GFX6-NEXT: v_lshrrev_b32_e32 v8, 14, v8
4359
4347
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
4360
4348
; GFX6-NEXT: v_or_b32_e32 v1, v1, v8
4349
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
4361
4350
; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v6
4362
4351
; GFX6-NEXT: v_and_b32_e32 v9, 15, v6
4363
4352
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
4364
- ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
4365
4353
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4366
- ; GFX6-NEXT: v_and_b32_e32 v9, 0xffff, v9
4367
4354
; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
4368
- ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
4369
4355
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0
4370
4356
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
4357
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
4371
4358
; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
4372
4359
; GFX6-NEXT: v_and_b32_e32 v3, 15, v8
4373
4360
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v8
4374
- ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
4375
4361
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4376
- ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
4377
4362
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v1
4378
4363
; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15
4379
- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6
4380
- ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
4364
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
4381
4365
; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
4382
4366
; GFX6-NEXT: v_bfe_u32 v3, v5, 1, 15
4383
4367
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
@@ -4388,9 +4372,7 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4388
4372
; GFX6-NEXT: v_and_b32_e32 v5, 15, v4
4389
4373
; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
4390
4374
; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
4391
- ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
4392
4375
; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
4393
- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
4394
4376
; GFX6-NEXT: v_lshlrev_b32_e32 v2, v5, v2
4395
4377
; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
4396
4378
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
@@ -4782,26 +4764,22 @@ define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
4782
4764
; GFX6-NEXT: v_lshrrev_b32_e32 v10, 14, v10
4783
4765
; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
4784
4766
; GFX6-NEXT: v_or_b32_e32 v1, v1, v10
4767
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
4785
4768
; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v8
4786
4769
; GFX6-NEXT: v_and_b32_e32 v11, 15, v8
4787
4770
; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
4788
- ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
4789
4771
; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
4790
- ; GFX6-NEXT: v_and_b32_e32 v11, 0xffff, v11
4791
4772
; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
4792
- ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
4793
4773
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v11, v0
4794
4774
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
4775
+ ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5
4795
4776
; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
4796
4777
; GFX6-NEXT: v_and_b32_e32 v4, 15, v10
4797
4778
; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10
4798
- ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5
4799
4779
; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
4800
- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
4801
4780
; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
4802
4781
; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
4803
- ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v8
4804
- ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
4782
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
4805
4783
; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
4806
4784
; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15
4807
4785
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
@@ -4818,20 +4796,16 @@ define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
4818
4796
; GFX6-NEXT: v_and_b32_e32 v8, 15, v6
4819
4797
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
4820
4798
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4821
- ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
4822
4799
; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
4823
- ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
4824
4800
; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2
4825
4801
; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4
4826
4802
; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
4827
4803
; GFX6-NEXT: v_and_b32_e32 v4, 15, v7
4828
4804
; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7
4829
4805
; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4830
- ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
4831
4806
; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
4832
4807
; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
4833
- ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v6
4834
- ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
4808
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4
4835
4809
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
4836
4810
; GFX6-NEXT: s_setpc_b64 s[30:31]
4837
4811
;
0 commit comments