Skip to content

Commit b2ec416

Browse files
authored
[ARM] Fix NaN behaviour for MVE compare intrinsics (#116371)
The MVE intrinsics are defined as having the same behaviour as the instructions which they correspond to. In particular, the vcmpleq and vcmpltq intrinsics correspond to the VCMP instruction with the LE or LT condition. However, these instructions with these two conditions do not match the normal IEEE754 behaviour for NaNs, they return true if either operand is a NaN, instead of false. Therefore we need to generate `fcmp` IR instructions with the `ule` and `ult` conditions, instead of `ole` and `olt`. This differs from AdvSIMD, where only instructions with the EQ, GE and GT conditions are available, and the intrinsics for the others are defined by swapping the condition and operand order, so the results match the IEEE754 behaviour for NaNs.
1 parent b3e2b1a commit b2ec416

File tree

3 files changed

+20
-20
lines changed

3 files changed

+20
-20
lines changed

clang/include/clang/Basic/arm_mve.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -753,8 +753,8 @@ let params = T.Float in {
753753
defm: compare<"ne", fcmp_ne>;
754754
defm: compare<"gt", fcmp_gt>;
755755
defm: compare<"ge", fcmp_ge>;
756-
defm: compare<"lt", fcmp_lt>;
757-
defm: compare<"le", fcmp_le>;
756+
defm: compare<"lt", fcmp_ult>;
757+
defm: compare<"le", fcmp_ule>;
758758
}
759759

760760
let params = T.Signed in {

clang/include/clang/Basic/arm_mve_defs.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ def fcmp_eq: IRBuilder<"CreateFCmpOEQ">;
116116
def fcmp_ne: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
117117
def fcmp_gt: IRBuilder<"CreateFCmpOGT">;
118118
def fcmp_ge: IRBuilder<"CreateFCmpOGE">;
119-
def fcmp_lt: IRBuilder<"CreateFCmpOLT">;
120-
def fcmp_le: IRBuilder<"CreateFCmpOLE">;
119+
def fcmp_ult: IRBuilder<"CreateFCmpULT">;
120+
def fcmp_ule: IRBuilder<"CreateFCmpULE">;
121121
def splat: CGHelperFn<"ARMMVEVectorSplat">;
122122
def select: IRBuilder<"CreateSelect">;
123123
def fneg: IRBuilder<"CreateFNeg">;

clang/test/CodeGen/arm-mve-intrinsics/compare.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2376,7 +2376,7 @@ mve_pred16_t test_vcmphiq_m_n_u32(uint32x4_t a, uint32_t b, mve_pred16_t p)
23762376

23772377
// CHECK-LABEL: @test_vcmpleq_f16(
23782378
// CHECK-NEXT: entry:
2379-
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[B:%.*]]
2379+
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ule <8 x half> [[A:%.*]], [[B:%.*]]
23802380
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
23812381
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
23822382
// CHECK-NEXT: ret i16 [[TMP2]]
@@ -2392,7 +2392,7 @@ mve_pred16_t test_vcmpleq_f16(float16x8_t a, float16x8_t b)
23922392

23932393
// CHECK-LABEL: @test_vcmpleq_f32(
23942394
// CHECK-NEXT: entry:
2395-
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <4 x float> [[A:%.*]], [[B:%.*]]
2395+
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ule <4 x float> [[A:%.*]], [[B:%.*]]
23962396
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
23972397
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
23982398
// CHECK-NEXT: ret i16 [[TMP2]]
@@ -2458,7 +2458,7 @@ mve_pred16_t test_vcmpleq_s32(int32x4_t a, int32x4_t b)
24582458
// CHECK-NEXT: entry:
24592459
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
24602460
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
2461-
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]]
2461+
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ule <8 x half> [[A:%.*]], [[DOTSPLAT]]
24622462
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
24632463
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
24642464
// CHECK-NEXT: ret i16 [[TMP2]]
@@ -2476,7 +2476,7 @@ mve_pred16_t test_vcmpleq_n_f16(float16x8_t a, float16_t b)
24762476
// CHECK-NEXT: entry:
24772477
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
24782478
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
2479-
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <4 x float> [[A:%.*]], [[DOTSPLAT]]
2479+
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ule <4 x float> [[A:%.*]], [[DOTSPLAT]]
24802480
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
24812481
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
24822482
// CHECK-NEXT: ret i16 [[TMP2]]
@@ -2548,7 +2548,7 @@ mve_pred16_t test_vcmpleq_n_s32(int32x4_t a, int32_t b)
25482548
// CHECK-NEXT: entry:
25492549
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
25502550
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
2551-
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[B:%.*]]
2551+
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ule <8 x half> [[A:%.*]], [[B:%.*]]
25522552
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
25532553
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
25542554
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
@@ -2567,7 +2567,7 @@ mve_pred16_t test_vcmpleq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
25672567
// CHECK-NEXT: entry:
25682568
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
25692569
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
2570-
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ole <4 x float> [[A:%.*]], [[B:%.*]]
2570+
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ule <4 x float> [[A:%.*]], [[B:%.*]]
25712571
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
25722572
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
25732573
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
@@ -2645,7 +2645,7 @@ mve_pred16_t test_vcmpleq_m_s32(int32x4_t a, int32x4_t b, mve_pred16_t p)
26452645
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
26462646
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
26472647
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
2648-
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ole <8 x half> [[A:%.*]], [[DOTSPLAT]]
2648+
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ule <8 x half> [[A:%.*]], [[DOTSPLAT]]
26492649
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
26502650
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
26512651
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
@@ -2666,7 +2666,7 @@ mve_pred16_t test_vcmpleq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
26662666
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
26672667
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
26682668
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
2669-
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ole <4 x float> [[A:%.*]], [[DOTSPLAT]]
2669+
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ule <4 x float> [[A:%.*]], [[DOTSPLAT]]
26702670
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
26712671
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
26722672
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
@@ -2746,7 +2746,7 @@ mve_pred16_t test_vcmpleq_m_n_s32(int32x4_t a, int32_t b, mve_pred16_t p)
27462746

27472747
// CHECK-LABEL: @test_vcmpltq_f16(
27482748
// CHECK-NEXT: entry:
2749-
// CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[B:%.*]]
2749+
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ult <8 x half> [[A:%.*]], [[B:%.*]]
27502750
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
27512751
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
27522752
// CHECK-NEXT: ret i16 [[TMP2]]
@@ -2762,7 +2762,7 @@ mve_pred16_t test_vcmpltq_f16(float16x8_t a, float16x8_t b)
27622762

27632763
// CHECK-LABEL: @test_vcmpltq_f32(
27642764
// CHECK-NEXT: entry:
2765-
// CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[B:%.*]]
2765+
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ult <4 x float> [[A:%.*]], [[B:%.*]]
27662766
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
27672767
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
27682768
// CHECK-NEXT: ret i16 [[TMP2]]
@@ -2828,7 +2828,7 @@ mve_pred16_t test_vcmpltq_s32(int32x4_t a, int32x4_t b)
28282828
// CHECK-NEXT: entry:
28292829
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
28302830
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
2831-
// CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]]
2831+
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ult <8 x half> [[A:%.*]], [[DOTSPLAT]]
28322832
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]])
28332833
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
28342834
// CHECK-NEXT: ret i16 [[TMP2]]
@@ -2846,7 +2846,7 @@ mve_pred16_t test_vcmpltq_n_f16(float16x8_t a, float16_t b)
28462846
// CHECK-NEXT: entry:
28472847
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
28482848
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
2849-
// CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[DOTSPLAT]]
2849+
// CHECK-NEXT: [[TMP0:%.*]] = fcmp ult <4 x float> [[A:%.*]], [[DOTSPLAT]]
28502850
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP0]])
28512851
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
28522852
// CHECK-NEXT: ret i16 [[TMP2]]
@@ -2918,7 +2918,7 @@ mve_pred16_t test_vcmpltq_n_s32(int32x4_t a, int32_t b)
29182918
// CHECK-NEXT: entry:
29192919
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
29202920
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
2921-
// CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[B:%.*]]
2921+
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ult <8 x half> [[A:%.*]], [[B:%.*]]
29222922
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
29232923
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
29242924
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
@@ -2937,7 +2937,7 @@ mve_pred16_t test_vcmpltq_m_f16(float16x8_t a, float16x8_t b, mve_pred16_t p)
29372937
// CHECK-NEXT: entry:
29382938
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
29392939
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
2940-
// CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[B:%.*]]
2940+
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ult <4 x float> [[A:%.*]], [[B:%.*]]
29412941
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
29422942
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
29432943
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
@@ -3015,7 +3015,7 @@ mve_pred16_t test_vcmpltq_m_s32(int32x4_t a, int32x4_t b, mve_pred16_t p)
30153015
// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
30163016
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> poison, half [[B:%.*]], i64 0
30173017
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> poison, <8 x i32> zeroinitializer
3018-
// CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <8 x half> [[A:%.*]], [[DOTSPLAT]]
3018+
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ult <8 x half> [[A:%.*]], [[DOTSPLAT]]
30193019
// CHECK-NEXT: [[TMP3:%.*]] = and <8 x i1> [[TMP1]], [[TMP2]]
30203020
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP3]])
30213021
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
@@ -3036,7 +3036,7 @@ mve_pred16_t test_vcmpltq_m_n_f16(float16x8_t a, float16_t b, mve_pred16_t p)
30363036
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
30373037
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
30383038
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
3039-
// CHECK-NEXT: [[TMP2:%.*]] = fcmp olt <4 x float> [[A:%.*]], [[DOTSPLAT]]
3039+
// CHECK-NEXT: [[TMP2:%.*]] = fcmp ult <4 x float> [[A:%.*]], [[DOTSPLAT]]
30403040
// CHECK-NEXT: [[TMP3:%.*]] = and <4 x i1> [[TMP1]], [[TMP2]]
30413041
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
30423042
// CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16

0 commit comments

Comments
 (0)