Skip to content

Commit 225e671

Browse files
committed
[RISCV] Correct fcopysign pattern for zdinx
Correcting the pattern fixes the following error: fatal error: error in backend: Cannot select: t17: f64 = fcopysign t5, t8
1 parent cdfd4cf commit 225e671

File tree

2 files changed

+142
-1
lines changed

2 files changed

+142
-1
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoD.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ def : PatFprFpr<riscv_fsgnjx, FSGNJX_D_IN32X, FPR64IN32X, f64>;
363363
def : Pat<(fcopysign FPR64IN32X:$rs1, (fneg FPR64IN32X:$rs2)),
364364
(FSGNJN_D_IN32X $rs1, $rs2)>;
365365
def : Pat<(fcopysign FPR64IN32X:$rs1, FPR32INX:$rs2),
366-
(FSGNJ_D_IN32X $rs1, (FCVT_D_S_INX $rs2, FRM_RNE))>;
366+
(FSGNJ_D_IN32X $rs1, (FCVT_D_S_IN32X $rs2, FRM_RNE))>;
367367
def : Pat<(fcopysign FPR32INX:$rs1, FPR64IN32X:$rs2),
368368
(FSGNJ_S_INX $rs1, (FCVT_S_D_IN32X $rs2, FRM_DYN))>;
369369

llvm/test/CodeGen/RISCV/copysign-casts.ll

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@
2727
; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+f -mattr=+d \
2828
; RUN: -mattr=+zfhmin -target-abi lp64d < %s \
2929
; RUN: | FileCheck %s -check-prefix=RV64IFDZFHMIN
30+
; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+zdinx \
31+
; RUN: -target-abi ilp32 < %s \
32+
; RUN: | FileCheck %s -check-prefix=RV32IZDINX
33+
; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+zdinx \
34+
; RUN: -target-abi lp64 < %s \
35+
; RUN: | FileCheck %s -check-prefix=RV64IZDINX
3036

3137
; Test fcopysign scenarios where the sign argument is casted to the type of the
3238
; magnitude argument. Those casts can be folded away by the DAGCombiner.
@@ -120,6 +126,18 @@ define double @fold_promote_d_s(double %a, float %b) nounwind {
120126
; RV64IFDZFHMIN-NEXT: fcvt.d.s fa5, fa1
121127
; RV64IFDZFHMIN-NEXT: fsgnj.d fa0, fa0, fa5
122128
; RV64IFDZFHMIN-NEXT: ret
129+
;
130+
; RV32IZDINX-LABEL: fold_promote_d_s:
131+
; RV32IZDINX: # %bb.0:
132+
; RV32IZDINX-NEXT: fcvt.d.s a2, a2
133+
; RV32IZDINX-NEXT: fsgnj.d a0, a0, a2
134+
; RV32IZDINX-NEXT: ret
135+
;
136+
; RV64IZDINX-LABEL: fold_promote_d_s:
137+
; RV64IZDINX: # %bb.0:
138+
; RV64IZDINX-NEXT: fcvt.d.s a1, a1
139+
; RV64IZDINX-NEXT: fsgnj.d a0, a0, a1
140+
; RV64IZDINX-NEXT: ret
123141
%c = fpext float %b to double
124142
%t = call double @llvm.copysign.f64(double %a, double %c)
125143
ret double %t
@@ -232,6 +250,39 @@ define double @fold_promote_d_h(double %a, half %b) nounwind {
232250
; RV64IFDZFHMIN-NEXT: fcvt.d.h fa5, fa1
233251
; RV64IFDZFHMIN-NEXT: fsgnj.d fa0, fa0, fa5
234252
; RV64IFDZFHMIN-NEXT: ret
253+
;
254+
; RV32IZDINX-LABEL: fold_promote_d_h:
255+
; RV32IZDINX: # %bb.0:
256+
; RV32IZDINX-NEXT: addi sp, sp, -16
257+
; RV32IZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
258+
; RV32IZDINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
259+
; RV32IZDINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
260+
; RV32IZDINX-NEXT: mv s1, a1
261+
; RV32IZDINX-NEXT: mv s0, a0
262+
; RV32IZDINX-NEXT: mv a0, a2
263+
; RV32IZDINX-NEXT: call __extendhfsf2
264+
; RV32IZDINX-NEXT: fcvt.d.s a0, a0
265+
; RV32IZDINX-NEXT: fsgnj.d a0, s0, a0
266+
; RV32IZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
267+
; RV32IZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
268+
; RV32IZDINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
269+
; RV32IZDINX-NEXT: addi sp, sp, 16
270+
; RV32IZDINX-NEXT: ret
271+
;
272+
; RV64IZDINX-LABEL: fold_promote_d_h:
273+
; RV64IZDINX: # %bb.0:
274+
; RV64IZDINX-NEXT: addi sp, sp, -16
275+
; RV64IZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
276+
; RV64IZDINX-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
277+
; RV64IZDINX-NEXT: mv s0, a0
278+
; RV64IZDINX-NEXT: mv a0, a1
279+
; RV64IZDINX-NEXT: call __extendhfsf2
280+
; RV64IZDINX-NEXT: fcvt.d.s a0, a0
281+
; RV64IZDINX-NEXT: fsgnj.d a0, s0, a0
282+
; RV64IZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
283+
; RV64IZDINX-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
284+
; RV64IZDINX-NEXT: addi sp, sp, 16
285+
; RV64IZDINX-NEXT: ret
235286
%c = fpext half %b to double
236287
%t = call double @llvm.copysign.f64(double %a, double %c)
237288
ret double %t
@@ -335,6 +386,34 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
335386
; RV64IFDZFHMIN-NEXT: fcvt.s.h fa5, fa1
336387
; RV64IFDZFHMIN-NEXT: fsgnj.s fa0, fa0, fa5
337388
; RV64IFDZFHMIN-NEXT: ret
389+
;
390+
; RV32IZDINX-LABEL: fold_promote_f_h:
391+
; RV32IZDINX: # %bb.0:
392+
; RV32IZDINX-NEXT: addi sp, sp, -16
393+
; RV32IZDINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
394+
; RV32IZDINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
395+
; RV32IZDINX-NEXT: mv s0, a0
396+
; RV32IZDINX-NEXT: mv a0, a1
397+
; RV32IZDINX-NEXT: call __extendhfsf2
398+
; RV32IZDINX-NEXT: fsgnj.s a0, s0, a0
399+
; RV32IZDINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
400+
; RV32IZDINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
401+
; RV32IZDINX-NEXT: addi sp, sp, 16
402+
; RV32IZDINX-NEXT: ret
403+
;
404+
; RV64IZDINX-LABEL: fold_promote_f_h:
405+
; RV64IZDINX: # %bb.0:
406+
; RV64IZDINX-NEXT: addi sp, sp, -16
407+
; RV64IZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
408+
; RV64IZDINX-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
409+
; RV64IZDINX-NEXT: mv s0, a0
410+
; RV64IZDINX-NEXT: mv a0, a1
411+
; RV64IZDINX-NEXT: call __extendhfsf2
412+
; RV64IZDINX-NEXT: fsgnj.s a0, s0, a0
413+
; RV64IZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
414+
; RV64IZDINX-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
415+
; RV64IZDINX-NEXT: addi sp, sp, 16
416+
; RV64IZDINX-NEXT: ret
338417
%c = fpext half %b to float
339418
%t = call float @llvm.copysign.f32(float %a, float %c)
340419
ret float %t
@@ -413,6 +492,20 @@ define float @fold_demote_s_d(float %a, double %b) nounwind {
413492
; RV64IFDZFHMIN-NEXT: fcvt.s.d fa5, fa1
414493
; RV64IFDZFHMIN-NEXT: fsgnj.s fa0, fa0, fa5
415494
; RV64IFDZFHMIN-NEXT: ret
495+
;
496+
; RV32IZDINX-LABEL: fold_demote_s_d:
497+
; RV32IZDINX: # %bb.0:
498+
; RV32IZDINX-NEXT: mv a3, a2
499+
; RV32IZDINX-NEXT: mv a2, a1
500+
; RV32IZDINX-NEXT: fcvt.s.d a1, a2
501+
; RV32IZDINX-NEXT: fsgnj.s a0, a0, a1
502+
; RV32IZDINX-NEXT: ret
503+
;
504+
; RV64IZDINX-LABEL: fold_demote_s_d:
505+
; RV64IZDINX: # %bb.0:
506+
; RV64IZDINX-NEXT: fcvt.s.d a1, a1
507+
; RV64IZDINX-NEXT: fsgnj.s a0, a0, a1
508+
; RV64IZDINX-NEXT: ret
416509
%c = fptrunc double %b to float
417510
%t = call float @llvm.copysign.f32(float %a, float %c)
418511
ret float %t
@@ -536,6 +629,30 @@ define half @fold_demote_h_s(half %a, float %b) nounwind {
536629
; RV64IFDZFHMIN-NEXT: or a0, a1, a0
537630
; RV64IFDZFHMIN-NEXT: fmv.h.x fa0, a0
538631
; RV64IFDZFHMIN-NEXT: ret
632+
;
633+
; RV32IZDINX-LABEL: fold_demote_h_s:
634+
; RV32IZDINX: # %bb.0:
635+
; RV32IZDINX-NEXT: lui a2, 524288
636+
; RV32IZDINX-NEXT: and a1, a1, a2
637+
; RV32IZDINX-NEXT: srli a1, a1, 16
638+
; RV32IZDINX-NEXT: slli a0, a0, 17
639+
; RV32IZDINX-NEXT: srli a0, a0, 17
640+
; RV32IZDINX-NEXT: lui a2, 1048560
641+
; RV32IZDINX-NEXT: or a0, a0, a2
642+
; RV32IZDINX-NEXT: or a0, a0, a1
643+
; RV32IZDINX-NEXT: ret
644+
;
645+
; RV64IZDINX-LABEL: fold_demote_h_s:
646+
; RV64IZDINX: # %bb.0:
647+
; RV64IZDINX-NEXT: lui a2, 524288
648+
; RV64IZDINX-NEXT: and a1, a1, a2
649+
; RV64IZDINX-NEXT: srli a1, a1, 16
650+
; RV64IZDINX-NEXT: slli a0, a0, 49
651+
; RV64IZDINX-NEXT: srli a0, a0, 49
652+
; RV64IZDINX-NEXT: lui a2, 1048560
653+
; RV64IZDINX-NEXT: or a0, a0, a2
654+
; RV64IZDINX-NEXT: or a0, a0, a1
655+
; RV64IZDINX-NEXT: ret
539656
%c = fptrunc float %b to half
540657
%t = call half @llvm.copysign.f16(half %a, half %c)
541658
ret half %t
@@ -665,6 +782,30 @@ define half @fold_demote_h_d(half %a, double %b) nounwind {
665782
; RV64IFDZFHMIN-NEXT: or a0, a1, a0
666783
; RV64IFDZFHMIN-NEXT: fmv.h.x fa0, a0
667784
; RV64IFDZFHMIN-NEXT: ret
785+
;
786+
; RV32IZDINX-LABEL: fold_demote_h_d:
787+
; RV32IZDINX: # %bb.0:
788+
; RV32IZDINX-NEXT: lui a1, 524288
789+
; RV32IZDINX-NEXT: and a1, a2, a1
790+
; RV32IZDINX-NEXT: srli a1, a1, 16
791+
; RV32IZDINX-NEXT: slli a0, a0, 17
792+
; RV32IZDINX-NEXT: srli a0, a0, 17
793+
; RV32IZDINX-NEXT: lui a2, 1048560
794+
; RV32IZDINX-NEXT: or a0, a0, a2
795+
; RV32IZDINX-NEXT: or a0, a0, a1
796+
; RV32IZDINX-NEXT: ret
797+
;
798+
; RV64IZDINX-LABEL: fold_demote_h_d:
799+
; RV64IZDINX: # %bb.0:
800+
; RV64IZDINX-NEXT: slli a0, a0, 49
801+
; RV64IZDINX-NEXT: srli a0, a0, 49
802+
; RV64IZDINX-NEXT: srli a1, a1, 63
803+
; RV64IZDINX-NEXT: slli a1, a1, 63
804+
; RV64IZDINX-NEXT: srli a1, a1, 48
805+
; RV64IZDINX-NEXT: lui a2, 1048560
806+
; RV64IZDINX-NEXT: or a0, a0, a2
807+
; RV64IZDINX-NEXT: or a0, a0, a1
808+
; RV64IZDINX-NEXT: ret
668809
%c = fptrunc double %b to half
669810
%t = call half @llvm.copysign.f16(half %a, half %c)
670811
ret half %t

0 commit comments

Comments
 (0)