Skip to content

Commit b2524eb

Browse files
committed
[HIP] Fix HIP rounding math intrinsics
The __ocml_*_rte_f32 and __ocml_*_rte_f64 functions are not available if OCML_BASIC_ROUNDED_OPERATIONS is not defined. Reviewed By: b-sumner, yaxunl Fixes: SWDEV-257235 Differential Revision: https://reviews.llvm.org/D89966
1 parent e24537d commit b2524eb

File tree

1 file changed

+79
-50
lines changed

1 file changed

+79
-50
lines changed

clang/lib/Headers/__clang_hip_math.h

Lines changed: 79 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -547,102 +547,117 @@ float __expf(float __x) { return __ocml_native_exp_f32(__x); }
547547
#if defined OCML_BASIC_ROUNDED_OPERATIONS
548548
__DEVICE__
549549
float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
550-
#endif
551550
__DEVICE__
552551
float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
553-
#if defined OCML_BASIC_ROUNDED_OPERATIONS
554552
__DEVICE__
555553
float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
556-
557554
__DEVICE__
558555
float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
556+
#else
557+
__DEVICE__
558+
float __fadd_rn(float __x, float __y) { return __x + __y; }
559+
#endif
559560

561+
#if defined OCML_BASIC_ROUNDED_OPERATIONS
560562
__DEVICE__
561563
float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
562-
#endif
563564
__DEVICE__
564565
float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
565-
#if defined OCML_BASIC_ROUNDED_OPERATIONS
566566
__DEVICE__
567567
float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
568-
569568
__DEVICE__
570569
float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
570+
#else
571+
__DEVICE__
572+
float __fdiv_rn(float __x, float __y) { return __x / __y; }
571573
#endif
574+
572575
__DEVICE__
573576
float __fdividef(float __x, float __y) { return __x / __y; }
577+
574578
#if defined OCML_BASIC_ROUNDED_OPERATIONS
575579
__DEVICE__
576580
float __fmaf_rd(float __x, float __y, float __z) {
577581
return __ocml_fma_rtn_f32(__x, __y, __z);
578582
}
579-
#endif
580583
__DEVICE__
581584
float __fmaf_rn(float __x, float __y, float __z) {
582585
return __ocml_fma_rte_f32(__x, __y, __z);
583586
}
584-
#if defined OCML_BASIC_ROUNDED_OPERATIONS
585587
__DEVICE__
586588
float __fmaf_ru(float __x, float __y, float __z) {
587589
return __ocml_fma_rtp_f32(__x, __y, __z);
588590
}
589-
590591
__DEVICE__
591592
float __fmaf_rz(float __x, float __y, float __z) {
592593
return __ocml_fma_rtz_f32(__x, __y, __z);
593594
}
595+
#else
596+
__DEVICE__
597+
float __fmaf_rn(float __x, float __y, float __z) {
598+
return __ocml_fma_f32(__x, __y, __z);
599+
}
600+
#endif
594601

602+
#if defined OCML_BASIC_ROUNDED_OPERATIONS
595603
__DEVICE__
596604
float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
597-
#endif
598605
__DEVICE__
599606
float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
600-
#if defined OCML_BASIC_ROUNDED_OPERATIONS
601607
__DEVICE__
602608
float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
603-
604609
__DEVICE__
605610
float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
606-
611+
#else
607612
__DEVICE__
608-
float __frcp_rd(float __x) { return __llvm_amdgcn_rcp_f32(__x); }
613+
float __fmul_rn(float __x, float __y) { return __x * __y; }
609614
#endif
610-
__DEVICE__
611-
float __frcp_rn(float __x) { return __llvm_amdgcn_rcp_f32(__x); }
615+
612616
#if defined OCML_BASIC_ROUNDED_OPERATIONS
613617
__DEVICE__
614-
float __frcp_ru(float __x) { return __llvm_amdgcn_rcp_f32(__x); }
615-
618+
float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
619+
__DEVICE__
620+
float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
616621
__DEVICE__
617-
float __frcp_rz(float __x) { return __llvm_amdgcn_rcp_f32(__x); }
622+
float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
623+
__DEVICE__
624+
float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
625+
#else
626+
__DEVICE__
627+
float __frcp_rn(float __x) { return 1.0f / __x; }
618628
#endif
629+
619630
__DEVICE__
620631
float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
632+
621633
#if defined OCML_BASIC_ROUNDED_OPERATIONS
622634
__DEVICE__
623635
float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
624-
#endif
625636
__DEVICE__
626637
float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
627-
#if defined OCML_BASIC_ROUNDED_OPERATIONS
628638
__DEVICE__
629639
float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
630-
631640
__DEVICE__
632641
float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
642+
#else
643+
__DEVICE__
644+
float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
645+
#endif
633646

647+
#if defined OCML_BASIC_ROUNDED_OPERATIONS
634648
__DEVICE__
635649
float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
636-
#endif
637650
__DEVICE__
638651
float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
639-
#if defined OCML_BASIC_ROUNDED_OPERATIONS
640652
__DEVICE__
641653
float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
642-
643654
__DEVICE__
644655
float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
656+
#else
657+
__DEVICE__
658+
float __fsub_rn(float __x, float __y) { return __x - __y; }
645659
#endif
660+
646661
__DEVICE__
647662
float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
648663

@@ -1071,125 +1086,139 @@ __DEVICE__
10711086
double __dadd_rd(double __x, double __y) {
10721087
return __ocml_add_rtn_f64(__x, __y);
10731088
}
1074-
#endif
10751089
__DEVICE__
10761090
double __dadd_rn(double __x, double __y) {
10771091
return __ocml_add_rte_f64(__x, __y);
10781092
}
1079-
#if defined OCML_BASIC_ROUNDED_OPERATIONS
10801093
__DEVICE__
10811094
double __dadd_ru(double __x, double __y) {
10821095
return __ocml_add_rtp_f64(__x, __y);
10831096
}
1084-
10851097
__DEVICE__
10861098
double __dadd_rz(double __x, double __y) {
10871099
return __ocml_add_rtz_f64(__x, __y);
10881100
}
1101+
#else
1102+
__DEVICE__
1103+
double __dadd_rn(double __x, double __y) { return __x + __y; }
1104+
#endif
10891105

1106+
#if defined OCML_BASIC_ROUNDED_OPERATIONS
10901107
__DEVICE__
10911108
double __ddiv_rd(double __x, double __y) {
10921109
return __ocml_div_rtn_f64(__x, __y);
10931110
}
1094-
#endif
10951111
__DEVICE__
10961112
double __ddiv_rn(double __x, double __y) {
10971113
return __ocml_div_rte_f64(__x, __y);
10981114
}
1099-
#if defined OCML_BASIC_ROUNDED_OPERATIONS
11001115
__DEVICE__
11011116
double __ddiv_ru(double __x, double __y) {
11021117
return __ocml_div_rtp_f64(__x, __y);
11031118
}
1104-
11051119
__DEVICE__
11061120
double __ddiv_rz(double __x, double __y) {
11071121
return __ocml_div_rtz_f64(__x, __y);
11081122
}
1123+
#else
1124+
__DEVICE__
1125+
double __ddiv_rn(double __x, double __y) { return __x / __y; }
1126+
#endif
11091127

1128+
#if defined OCML_BASIC_ROUNDED_OPERATIONS
11101129
__DEVICE__
11111130
double __dmul_rd(double __x, double __y) {
11121131
return __ocml_mul_rtn_f64(__x, __y);
11131132
}
1114-
#endif
11151133
__DEVICE__
11161134
double __dmul_rn(double __x, double __y) {
11171135
return __ocml_mul_rte_f64(__x, __y);
11181136
}
1119-
#if defined OCML_BASIC_ROUNDED_OPERATIONS
11201137
__DEVICE__
11211138
double __dmul_ru(double __x, double __y) {
11221139
return __ocml_mul_rtp_f64(__x, __y);
11231140
}
1124-
11251141
__DEVICE__
11261142
double __dmul_rz(double __x, double __y) {
11271143
return __ocml_mul_rtz_f64(__x, __y);
11281144
}
1129-
1145+
#else
11301146
__DEVICE__
1131-
double __drcp_rd(double __x) { return __llvm_amdgcn_rcp_f64(__x); }
1147+
double __dmul_rn(double __x, double __y) { return __x * __y; }
11321148
#endif
1133-
__DEVICE__
1134-
double __drcp_rn(double __x) { return __llvm_amdgcn_rcp_f64(__x); }
1149+
11351150
#if defined OCML_BASIC_ROUNDED_OPERATIONS
11361151
__DEVICE__
1137-
double __drcp_ru(double __x) { return __llvm_amdgcn_rcp_f64(__x); }
1138-
1152+
double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
1153+
__DEVICE__
1154+
double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
1155+
__DEVICE__
1156+
double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
1157+
__DEVICE__
1158+
double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
1159+
#else
11391160
__DEVICE__
1140-
double __drcp_rz(double __x) { return __llvm_amdgcn_rcp_f64(__x); }
1161+
double __drcp_rn(double __x) { return 1.0 / __x; }
1162+
#endif
11411163

1164+
#if defined OCML_BASIC_ROUNDED_OPERATIONS
11421165
__DEVICE__
11431166
double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
1144-
#endif
11451167
__DEVICE__
11461168
double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
1147-
#if defined OCML_BASIC_ROUNDED_OPERATIONS
11481169
__DEVICE__
11491170
double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
1150-
11511171
__DEVICE__
11521172
double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
1173+
#else
1174+
__DEVICE__
1175+
double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
1176+
#endif
11531177

1178+
#if defined OCML_BASIC_ROUNDED_OPERATIONS
11541179
__DEVICE__
11551180
double __dsub_rd(double __x, double __y) {
11561181
return __ocml_sub_rtn_f64(__x, __y);
11571182
}
1158-
#endif
11591183
__DEVICE__
11601184
double __dsub_rn(double __x, double __y) {
11611185
return __ocml_sub_rte_f64(__x, __y);
11621186
}
1163-
#if defined OCML_BASIC_ROUNDED_OPERATIONS
11641187
__DEVICE__
11651188
double __dsub_ru(double __x, double __y) {
11661189
return __ocml_sub_rtp_f64(__x, __y);
11671190
}
1168-
11691191
__DEVICE__
11701192
double __dsub_rz(double __x, double __y) {
11711193
return __ocml_sub_rtz_f64(__x, __y);
11721194
}
1195+
#else
1196+
__DEVICE__
1197+
double __dsub_rn(double __x, double __y) { return __x - __y; }
1198+
#endif
11731199

1200+
#if defined OCML_BASIC_ROUNDED_OPERATIONS
11741201
__DEVICE__
11751202
double __fma_rd(double __x, double __y, double __z) {
11761203
return __ocml_fma_rtn_f64(__x, __y, __z);
11771204
}
1178-
#endif
11791205
__DEVICE__
11801206
double __fma_rn(double __x, double __y, double __z) {
11811207
return __ocml_fma_rte_f64(__x, __y, __z);
11821208
}
1183-
#if defined OCML_BASIC_ROUNDED_OPERATIONS
11841209
__DEVICE__
11851210
double __fma_ru(double __x, double __y, double __z) {
11861211
return __ocml_fma_rtp_f64(__x, __y, __z);
11871212
}
1188-
11891213
__DEVICE__
11901214
double __fma_rz(double __x, double __y, double __z) {
11911215
return __ocml_fma_rtz_f64(__x, __y, __z);
11921216
}
1217+
#else
1218+
__DEVICE__
1219+
double __fma_rn(double __x, double __y, double __z) {
1220+
return __ocml_fma_f64(__x, __y, __z);
1221+
}
11931222
#endif
11941223
// END INTRINSICS
11951224
// END DOUBLE

0 commit comments

Comments
 (0)