Skip to content

Commit e5edd64

Browse files
committed
[X86] Use a shorter sequence to implement FLT_ROUNDS
This code needs to map from the FPCW 2-bit encoding for rounding mode to the 2-bit encoding defined for FLT_ROUNDS. The previous implementation did some clever swapping of bits and adding 1 modulo 4 to do the mapping. This patch instead uses an 8-bit immediate as a lookup table of four 2-bit values. Then we use the 2-bit FPCW encoding to index the lookup table by using a right shift and an AND. This requires extracting the 2-bit value from FPCW and multipying it by 2 to make it usable as a shift amount. But still results in less code. Differential Revision: https://reviews.llvm.org/D73599
1 parent 6212987 commit e5edd64

File tree

2 files changed

+23
-27
lines changed

2 files changed

+23
-27
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25427,8 +25427,11 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2542725427
2 Round to +inf
2542825428
3 Round to -inf
2542925429

25430-
To perform the conversion, we do:
25431-
(((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
25430+
To perform the conversion, we use a packed lookup table of the four 2-bit
25431+
values that we can index by FPSP[11:10]
25432+
0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10]
25433+
25434+
(0x2d >> ((FPSR & 0xc00) >> 9)) & 3
2543225435
*/
2543325436

2543425437
MachineFunction &MF = DAG.getMachineFunction();
@@ -25456,24 +25459,19 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2545625459
SDValue CWD =
2545725460
DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MachinePointerInfo());
2545825461

25459-
// Transform as necessary
25460-
SDValue CWD1 =
25461-
DAG.getNode(ISD::SRL, DL, MVT::i16,
25462-
DAG.getNode(ISD::AND, DL, MVT::i16,
25463-
CWD, DAG.getConstant(0x800, DL, MVT::i16)),
25464-
DAG.getConstant(11, DL, MVT::i8));
25465-
SDValue CWD2 =
25462+
// Mask and turn the control bits into a shift for the lookup table.
25463+
SDValue Shift =
2546625464
DAG.getNode(ISD::SRL, DL, MVT::i16,
2546725465
DAG.getNode(ISD::AND, DL, MVT::i16,
25468-
CWD, DAG.getConstant(0x400, DL, MVT::i16)),
25466+
CWD, DAG.getConstant(0xc00, DL, MVT::i16)),
2546925467
DAG.getConstant(9, DL, MVT::i8));
25468+
Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Shift);
2547025469

25470+
SDValue LUT = DAG.getConstant(0x2d, DL, MVT::i32);
2547125471
SDValue RetVal =
25472-
DAG.getNode(ISD::AND, DL, MVT::i16,
25473-
DAG.getNode(ISD::ADD, DL, MVT::i16,
25474-
DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
25475-
DAG.getConstant(1, DL, MVT::i16)),
25476-
DAG.getConstant(3, DL, MVT::i16));
25472+
DAG.getNode(ISD::AND, DL, MVT::i32,
25473+
DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift),
25474+
DAG.getConstant(3, DL, MVT::i32));
2547725475

2547825476
return DAG.getZExtOrTrunc(RetVal, DL, VT);
2547925477
}

llvm/test/CodeGen/X86/flt-rounds.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,25 @@ define i32 @test_flt_rounds() nounwind {
1010
; X86: # %bb.0:
1111
; X86-NEXT: subl $12, %esp
1212
; X86-NEXT: fnstcw (%esp)
13-
; X86-NEXT: movl (%esp), %eax
14-
; X86-NEXT: movl %eax, %ecx
13+
; X86-NEXT: movzwl (%esp), %ecx
1514
; X86-NEXT: shrl $9, %ecx
16-
; X86-NEXT: andl $2, %ecx
17-
; X86-NEXT: shrl $11, %eax
18-
; X86-NEXT: andl $1, %eax
19-
; X86-NEXT: leal 1(%eax,%ecx), %eax
15+
; X86-NEXT: andb $6, %cl
16+
; X86-NEXT: movl $45, %eax
17+
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
18+
; X86-NEXT: shrl %cl, %eax
2019
; X86-NEXT: andl $3, %eax
2120
; X86-NEXT: addl $12, %esp
2221
; X86-NEXT: retl
2322
;
2423
; X64-LABEL: test_flt_rounds:
2524
; X64: # %bb.0:
2625
; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp)
27-
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
28-
; X64-NEXT: movl %eax, %ecx
26+
; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
2927
; X64-NEXT: shrl $9, %ecx
30-
; X64-NEXT: andl $2, %ecx
31-
; X64-NEXT: shrl $11, %eax
32-
; X64-NEXT: andl $1, %eax
33-
; X64-NEXT: leal 1(%rax,%rcx), %eax
28+
; X64-NEXT: andb $6, %cl
29+
; X64-NEXT: movl $45, %eax
30+
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
31+
; X64-NEXT: shrl %cl, %eax
3432
; X64-NEXT: andl $3, %eax
3533
; X64-NEXT: retq
3634
%1 = call i32 @llvm.flt.rounds()

0 commit comments

Comments
 (0)