-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][True16][CodeGen] select vgpr16 for asm inline 16bit vreg #140946
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you auto generate check lines? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure |
||
|
||
define amdgpu_kernel void @s_input_output_i16() #0 { | ||
; GFX11-LABEL: s_input_output_i16: | ||
; GFX11: ; %bb.0: | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: s_mov_b32 s0, -1 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_and_b32 s0, s0, 0xffff | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: ; use s0 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_endpgm | ||
%v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"() | ||
tail call void asm sideeffect "; use $0", "s"(i16 %v) #0 | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @s_input_output_f16() #0 { | ||
; GFX11-LABEL: s_input_output_f16: | ||
; GFX11: ; %bb.0: | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: s_mov_b32 s0, -1 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: ; use s0 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_endpgm | ||
%v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0 | ||
tail call void asm sideeffect "; use $0", "s"(half %v) | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @v_input_output_f16() #0 { | ||
; GFX11-LABEL: v_input_output_f16: | ||
; GFX11: ; %bb.0: | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: v_mov_b32 v0, -1 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: ; use v0 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_endpgm | ||
%v = tail call half asm sideeffect "v_mov_b32 $0, -1", "=v"() #0 | ||
tail call void asm sideeffect "; use $0", "v"(half %v) | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @v_input_output_i16() #0 { | ||
; GFX11-LABEL: v_input_output_i16: | ||
; GFX11: ; %bb.0: | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: v_mov_b32 v0, -1 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: ; use v0 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_endpgm | ||
%v = tail call i16 asm sideeffect "v_mov_b32 $0, -1", "=v"() #0 | ||
tail call void asm sideeffect "; use $0", "v"(i16 %v) | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @i16_imm_input_phys_vgpr() { | ||
; GFX11-LABEL: i16_imm_input_phys_vgpr: | ||
; GFX11: ; %bb.0: ; %entry | ||
; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: ; use v0 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_endpgm | ||
entry: | ||
call void asm sideeffect "; use $0 ", "{v0}"(i16 65535) | ||
ret void | ||
} | ||
|
||
attributes #0 = { nounwind } |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s | ||
|
||
define amdgpu_kernel void @s_input_output_i16() #0 { | ||
; GFX11-LABEL: s_input_output_i16: | ||
; GFX11: ; %bb.0: | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: s_mov_b32 s0, -1 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_and_b32 s0, s0, 0xffff | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: ; use s0 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_endpgm | ||
%v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"() | ||
tail call void asm sideeffect "; use $0", "s"(i16 %v) #0 | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @s_input_output_f16() #0 { | ||
; GFX11-LABEL: s_input_output_f16: | ||
; GFX11: ; %bb.0: | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: s_mov_b32 s0, -1 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: ; use s0 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_endpgm | ||
%v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0 | ||
tail call void asm sideeffect "; use $0", "s"(half %v) | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @v_input_output_f16() #0 { | ||
; GFX11-LABEL: v_input_output_f16: | ||
; GFX11: ; %bb.0: | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: v_mov_b16 v0.l, -1 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: ; use v0.l | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_endpgm | ||
%v = tail call half asm sideeffect "v_mov_b16 $0, -1", "=v"() #0 | ||
tail call void asm sideeffect "; use $0", "v"(half %v) | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @v_input_output_i16() #0 { | ||
; GFX11-LABEL: v_input_output_i16: | ||
; GFX11: ; %bb.0: | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: v_mov_b16 v0.l, -1 | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: ; use v0.l | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_endpgm | ||
%v = tail call i16 asm sideeffect "v_mov_b16 $0, -1", "=v"() #0 | ||
tail call void asm sideeffect "; use $0", "v"(i16 %v) | ||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @i16_imm_input_phys_vgpr_lo() { | ||
; GFX11-LABEL: i16_imm_input_phys_vgpr_lo: | ||
; GFX11: ; %bb.0: ; %entry | ||
; GFX11-NEXT: v_mov_b16_e32 v0.l, -1 | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: ; use v0.l | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_endpgm | ||
entry: | ||
call void asm sideeffect "; use $0 ", "{v0.l}"(i16 65535) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you please add one more test with v0.h? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And do we need a constraint to specify specifically an l or h for a virtual register? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a .h case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I do not think it is practically needed. At least it is not needed for correctness at this point. |
||
ret void | ||
} | ||
|
||
define amdgpu_kernel void @i16_imm_input_phys_vgpr_hi() { | ||
; GFX11-LABEL: i16_imm_input_phys_vgpr_hi: | ||
; GFX11: ; %bb.0: ; %entry | ||
; GFX11-NEXT: v_mov_b16_e32 v0.h, -1 | ||
; GFX11-NEXT: ;;#ASMSTART | ||
; GFX11-NEXT: ; use v0.h | ||
; GFX11-NEXT: ;;#ASMEND | ||
; GFX11-NEXT: s_endpgm | ||
entry: | ||
call void asm sideeffect "; use $0 ", "{v0.h}"(i16 65535) | ||
ret void | ||
} | ||
|
||
attributes #0 = { nounwind } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
switch
seems overkill here. Could just handle it with:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or even move the handling for the
BitWidth == 16
case insidegetVGPRClassForBitWidth
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This reminds me we need constraints for the aligned and unaligned versions of register classes