-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Fold llvm.amdgcn.cvt.pkrtz when either operand is fpext #108237
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1161,6 +1161,85 @@ define <2 x half> @constant_rtz_pkrtz() { | |
ret <2 x half> %cvt | ||
} | ||
|
||
define <2 x half> @fpext_const_cvt_pkrtz(half %x) { | ||
; CHECK-LABEL: @fpext_const_cvt_pkrtz( | ||
; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half poison, half 0xH4200>, half [[X:%.*]], i64 0 | ||
; CHECK-NEXT: ret <2 x half> [[CVT]] | ||
; | ||
%ext = fpext half %x to float | ||
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float 3.0) | ||
ret <2 x half> %cvt | ||
} | ||
|
||
define <2 x half> @const_fpext_cvt_pkrtz(half %y) { | ||
; CHECK-LABEL: @const_fpext_cvt_pkrtz( | ||
; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1 | ||
; CHECK-NEXT: ret <2 x half> [[CVT]] | ||
; | ||
%ext = fpext half %y to float | ||
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext) | ||
ret <2 x half> %cvt | ||
} | ||
|
||
define <2 x half> @const_fpext_multi_cvt_pkrtz(half %y) { | ||
; CHECK-LABEL: @const_fpext_multi_cvt_pkrtz( | ||
; CHECK-NEXT: [[CVT1:%.*]] = insertelement <2 x half> <half 0xH4500, half poison>, half [[Y:%.*]], i64 1 | ||
; CHECK-NEXT: [[CVT2:%.*]] = insertelement <2 x half> <half 0xH4200, half poison>, half [[Y]], i64 1 | ||
; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x half> [[CVT1]], [[CVT2]] | ||
; CHECK-NEXT: ret <2 x half> [[ADD]] | ||
; | ||
%ext = fpext half %y to float | ||
%cvt1 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 5.0, float %ext) | ||
%cvt2 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 3.0, float %ext) | ||
%add = fadd <2 x half> %cvt1, %cvt2 | ||
ret <2 x half> %add | ||
} | ||
|
||
define <2 x half> @fpext_fpext_cvt_pkrtz(half %x, half %y) { | ||
; CHECK-LABEL: @fpext_fpext_cvt_pkrtz( | ||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0 | ||
; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> [[TMP1]], half [[Y:%.*]], i64 1 | ||
; CHECK-NEXT: ret <2 x half> [[CVT]] | ||
; | ||
%extx = fpext half %x to float | ||
%exty = fpext half %y to float | ||
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty) | ||
ret <2 x half> %cvt | ||
} | ||
|
||
define <2 x half> @fpext_fpext_bf16_cvt_pkrtz(bfloat %x, bfloat %y) { | ||
; CHECK-LABEL: @fpext_fpext_bf16_cvt_pkrtz( | ||
; CHECK-NEXT: [[EXTX:%.*]] = fpext bfloat [[X:%.*]] to float | ||
; CHECK-NEXT: [[EXTY:%.*]] = fpext bfloat [[Y:%.*]] to float | ||
; CHECK-NEXT: [[CVT:%.*]] = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float [[EXTX]], float [[EXTY]]) | ||
; CHECK-NEXT: ret <2 x half> [[CVT]] | ||
; | ||
%extx = fpext bfloat %x to float | ||
%exty = fpext bfloat %y to float | ||
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %extx, float %exty) | ||
ret <2 x half> %cvt | ||
} | ||
|
||
define <2 x half> @poison_fpext_cvt_pkrtz(half %y) { | ||
; CHECK-LABEL: @poison_fpext_cvt_pkrtz( | ||
; CHECK-NEXT: [[CVT:%.*]] = insertelement <2 x half> poison, half [[Y:%.*]], i64 1 | ||
; CHECK-NEXT: ret <2 x half> [[CVT]] | ||
; | ||
%ext = fpext half %y to float | ||
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float poison, float %ext) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also test poison on RHS |
||
ret <2 x half> %cvt | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add some tests with bfloat sources. Also negative multi use test There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. bfloat tests would be negative since the intrinsic only supports half. As for multi use tests, I'm not sure if they should be negative. I know I used There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, negative |
||
define <2 x half> @fpext_poison_cvt_pkrtz(half %x) { | ||
; CHECK-LABEL: @fpext_poison_cvt_pkrtz( | ||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> poison, half [[X:%.*]], i64 0 | ||
; CHECK-NEXT: ret <2 x half> [[TMP1]] | ||
; | ||
%ext = fpext half %x to float | ||
%cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %ext, float poison) | ||
ret <2 x half> %cvt | ||
} | ||
|
||
; -------------------------------------------------------------------- | ||
; llvm.amdgcn.cvt.pknorm.i16 | ||
; -------------------------------------------------------------------- | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
usually we do undef -> qnan for FP folds (although I think this is overly conservative, and I assume is only to exclude snan bit patterns which isn't guaranteed to quiet anyway)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was following ConstantFoldCastInstruction which will fold (fptrunc undef) to undef.