@@ -813,4 +813,102 @@ bb7:
813
813
ret void
814
814
815
815
}
816
+
817
+ define amdgpu_kernel void @test_umul_i24 (ptr addrspace (1 ) %out , i32 %arg ) {
818
+ ; SI-LABEL: test_umul_i24:
819
+ ; SI: ; %bb.0:
820
+ ; SI-NEXT: s_load_dword s1, s[2:3], 0xb
821
+ ; SI-NEXT: v_mov_b32_e32 v0, 0xff803fe1
822
+ ; SI-NEXT: s_mov_b32 s0, 0
823
+ ; SI-NEXT: s_mov_b32 s3, 0xf000
824
+ ; SI-NEXT: s_waitcnt lgkmcnt(0)
825
+ ; SI-NEXT: s_lshr_b32 s1, s1, 9
826
+ ; SI-NEXT: v_mul_hi_u32 v0, s1, v0
827
+ ; SI-NEXT: s_mul_i32 s1, s1, 0xff803fe1
828
+ ; SI-NEXT: v_alignbit_b32 v0, v0, s1, 1
829
+ ; SI-NEXT: s_mov_b32 s2, -1
830
+ ; SI-NEXT: s_mov_b32 s1, s0
831
+ ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
832
+ ; SI-NEXT: s_endpgm
833
+ ;
834
+ ; VI-LABEL: test_umul_i24:
835
+ ; VI: ; %bb.0:
836
+ ; VI-NEXT: s_load_dword s0, s[2:3], 0x2c
837
+ ; VI-NEXT: v_mov_b32_e32 v0, 0xff803fe1
838
+ ; VI-NEXT: s_mov_b32 s3, 0xf000
839
+ ; VI-NEXT: s_mov_b32 s2, -1
840
+ ; VI-NEXT: s_waitcnt lgkmcnt(0)
841
+ ; VI-NEXT: s_lshr_b32 s0, s0, 9
842
+ ; VI-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s0, v0, 0
843
+ ; VI-NEXT: s_mov_b32 s0, 0
844
+ ; VI-NEXT: s_mov_b32 s1, s0
845
+ ; VI-NEXT: v_alignbit_b32 v0, v1, v0, 1
846
+ ; VI-NEXT: s_nop 1
847
+ ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
848
+ ; VI-NEXT: s_endpgm
849
+ ;
850
+ ; GFX9-LABEL: test_umul_i24:
851
+ ; GFX9: ; %bb.0:
852
+ ; GFX9-NEXT: s_load_dword s1, s[2:3], 0x2c
853
+ ; GFX9-NEXT: s_mov_b32 s0, 0
854
+ ; GFX9-NEXT: s_mov_b32 s3, 0xf000
855
+ ; GFX9-NEXT: s_mov_b32 s2, -1
856
+ ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
857
+ ; GFX9-NEXT: s_lshr_b32 s1, s1, 9
858
+ ; GFX9-NEXT: s_mul_hi_u32 s4, s1, 0xff803fe1
859
+ ; GFX9-NEXT: s_mul_i32 s1, s1, 0xff803fe1
860
+ ; GFX9-NEXT: v_mov_b32_e32 v0, s1
861
+ ; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, 1
862
+ ; GFX9-NEXT: s_mov_b32 s1, s0
863
+ ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
864
+ ; GFX9-NEXT: s_endpgm
865
+ ;
866
+ ; EG-LABEL: test_umul_i24:
867
+ ; EG: ; %bb.0:
868
+ ; EG-NEXT: ALU 8, @4, KC0[CB0:0-32], KC1[]
869
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
870
+ ; EG-NEXT: CF_END
871
+ ; EG-NEXT: PAD
872
+ ; EG-NEXT: ALU clause starting at 4:
873
+ ; EG-NEXT: LSHR * T0.W, KC0[2].Z, literal.x,
874
+ ; EG-NEXT: 9(1.261169e-44), 0(0.000000e+00)
875
+ ; EG-NEXT: MULHI * T0.X, PV.W, literal.x,
876
+ ; EG-NEXT: -8372255(nan), 0(0.000000e+00)
877
+ ; EG-NEXT: MULLO_INT * T0.Y, T0.W, literal.x,
878
+ ; EG-NEXT: -8372255(nan), 0(0.000000e+00)
879
+ ; EG-NEXT: BIT_ALIGN_INT T0.X, T0.X, PS, 1,
880
+ ; EG-NEXT: MOV * T1.X, literal.x,
881
+ ; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
882
+ ;
883
+ ; CM-LABEL: test_umul_i24:
884
+ ; CM: ; %bb.0:
885
+ ; CM-NEXT: ALU 14, @4, KC0[CB0:0-32], KC1[]
886
+ ; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
887
+ ; CM-NEXT: CF_END
888
+ ; CM-NEXT: PAD
889
+ ; CM-NEXT: ALU clause starting at 4:
890
+ ; CM-NEXT: LSHR * T0.W, KC0[2].Z, literal.x,
891
+ ; CM-NEXT: 9(1.261169e-44), 0(0.000000e+00)
892
+ ; CM-NEXT: MULHI T0.X, T0.W, literal.x,
893
+ ; CM-NEXT: MULHI T0.Y (MASKED), T0.W, literal.x,
894
+ ; CM-NEXT: MULHI T0.Z (MASKED), T0.W, literal.x,
895
+ ; CM-NEXT: MULHI * T0.W (MASKED), T0.W, literal.x,
896
+ ; CM-NEXT: -8372255(nan), 0(0.000000e+00)
897
+ ; CM-NEXT: MULLO_INT T0.X (MASKED), T0.W, literal.x,
898
+ ; CM-NEXT: MULLO_INT T0.Y, T0.W, literal.x,
899
+ ; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, literal.x,
900
+ ; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, literal.x,
901
+ ; CM-NEXT: -8372255(nan), 0(0.000000e+00)
902
+ ; CM-NEXT: BIT_ALIGN_INT * T0.X, T0.X, PV.Y, 1,
903
+ ; CM-NEXT: MOV * T1.X, literal.x,
904
+ ; CM-NEXT: 0(0.000000e+00), 0(0.000000e+00)
905
+ %i = lshr i32 %arg , 9
906
+ %i1 = zext i32 %i to i64
907
+ %i2 = mul i64 %i1 , 4286595041
908
+ %i3 = lshr i64 %i2 , 1
909
+ %i4 = trunc i64 %i3 to i32
910
+ store i32 %i4 , ptr addrspace (1 ) null , align 4
911
+ ret void
912
+ }
913
+
816
914
attributes #0 = { nounwind }
0 commit comments