@@ -547,102 +547,117 @@ float __expf(float __x) { return __ocml_native_exp_f32(__x); }
547
547
#if defined OCML_BASIC_ROUNDED_OPERATIONS
548
548
__DEVICE__
549
549
float __fadd_rd (float __x, float __y) { return __ocml_add_rtn_f32 (__x, __y); }
550
- #endif
551
550
__DEVICE__
552
551
float __fadd_rn (float __x, float __y) { return __ocml_add_rte_f32 (__x, __y); }
553
- #if defined OCML_BASIC_ROUNDED_OPERATIONS
554
552
__DEVICE__
555
553
float __fadd_ru (float __x, float __y) { return __ocml_add_rtp_f32 (__x, __y); }
556
-
557
554
__DEVICE__
558
555
float __fadd_rz (float __x, float __y) { return __ocml_add_rtz_f32 (__x, __y); }
556
+ #else
557
+ __DEVICE__
558
+ float __fadd_rn (float __x, float __y) { return __x + __y; }
559
+ #endif
559
560
561
+ #if defined OCML_BASIC_ROUNDED_OPERATIONS
560
562
__DEVICE__
561
563
float __fdiv_rd (float __x, float __y) { return __ocml_div_rtn_f32 (__x, __y); }
562
- #endif
563
564
__DEVICE__
564
565
float __fdiv_rn (float __x, float __y) { return __ocml_div_rte_f32 (__x, __y); }
565
- #if defined OCML_BASIC_ROUNDED_OPERATIONS
566
566
__DEVICE__
567
567
float __fdiv_ru (float __x, float __y) { return __ocml_div_rtp_f32 (__x, __y); }
568
-
569
568
__DEVICE__
570
569
float __fdiv_rz (float __x, float __y) { return __ocml_div_rtz_f32 (__x, __y); }
570
+ #else
571
+ __DEVICE__
572
+ float __fdiv_rn (float __x, float __y) { return __x / __y; }
571
573
#endif
574
+
572
575
__DEVICE__
573
576
float __fdividef (float __x, float __y) { return __x / __y; }
577
+
574
578
#if defined OCML_BASIC_ROUNDED_OPERATIONS
575
579
__DEVICE__
576
580
float __fmaf_rd (float __x, float __y, float __z) {
577
581
return __ocml_fma_rtn_f32 (__x, __y, __z);
578
582
}
579
- #endif
580
583
__DEVICE__
581
584
float __fmaf_rn (float __x, float __y, float __z) {
582
585
return __ocml_fma_rte_f32 (__x, __y, __z);
583
586
}
584
- #if defined OCML_BASIC_ROUNDED_OPERATIONS
585
587
__DEVICE__
586
588
float __fmaf_ru (float __x, float __y, float __z) {
587
589
return __ocml_fma_rtp_f32 (__x, __y, __z);
588
590
}
589
-
590
591
__DEVICE__
591
592
float __fmaf_rz (float __x, float __y, float __z) {
592
593
return __ocml_fma_rtz_f32 (__x, __y, __z);
593
594
}
595
+ #else
596
+ __DEVICE__
597
+ float __fmaf_rn (float __x, float __y, float __z) {
598
+ return __ocml_fma_f32 (__x, __y, __z);
599
+ }
600
+ #endif
594
601
602
+ #if defined OCML_BASIC_ROUNDED_OPERATIONS
595
603
__DEVICE__
596
604
float __fmul_rd (float __x, float __y) { return __ocml_mul_rtn_f32 (__x, __y); }
597
- #endif
598
605
__DEVICE__
599
606
float __fmul_rn (float __x, float __y) { return __ocml_mul_rte_f32 (__x, __y); }
600
- #if defined OCML_BASIC_ROUNDED_OPERATIONS
601
607
__DEVICE__
602
608
float __fmul_ru (float __x, float __y) { return __ocml_mul_rtp_f32 (__x, __y); }
603
-
604
609
__DEVICE__
605
610
float __fmul_rz (float __x, float __y) { return __ocml_mul_rtz_f32 (__x, __y); }
606
-
611
+ # else
607
612
__DEVICE__
608
- float __frcp_rd (float __x) { return __llvm_amdgcn_rcp_f32 ( __x) ; }
613
+ float __fmul_rn (float __x, float __y ) { return __x * __y ; }
609
614
#endif
610
- __DEVICE__
611
- float __frcp_rn (float __x) { return __llvm_amdgcn_rcp_f32 (__x); }
615
+
612
616
#if defined OCML_BASIC_ROUNDED_OPERATIONS
613
617
__DEVICE__
614
- float __frcp_ru (float __x) { return __llvm_amdgcn_rcp_f32 (__x); }
615
-
618
+ float __frcp_rd (float __x) { return __ocml_div_rtn_f32 (1 .0f , __x); }
619
+ __DEVICE__
620
+ float __frcp_rn (float __x) { return __ocml_div_rte_f32 (1 .0f , __x); }
616
621
__DEVICE__
617
- float __frcp_rz (float __x) { return __llvm_amdgcn_rcp_f32 (__x); }
622
+ float __frcp_ru (float __x) { return __ocml_div_rtp_f32 (1 .0f , __x); }
623
+ __DEVICE__
624
+ float __frcp_rz (float __x) { return __ocml_div_rtz_f32 (1 .0f , __x); }
625
+ #else
626
+ __DEVICE__
627
+ float __frcp_rn (float __x) { return 1 .0f / __x; }
618
628
#endif
629
+
619
630
__DEVICE__
620
631
float __frsqrt_rn (float __x) { return __llvm_amdgcn_rsq_f32 (__x); }
632
+
621
633
#if defined OCML_BASIC_ROUNDED_OPERATIONS
622
634
__DEVICE__
623
635
float __fsqrt_rd (float __x) { return __ocml_sqrt_rtn_f32 (__x); }
624
- #endif
625
636
__DEVICE__
626
637
float __fsqrt_rn (float __x) { return __ocml_sqrt_rte_f32 (__x); }
627
- #if defined OCML_BASIC_ROUNDED_OPERATIONS
628
638
__DEVICE__
629
639
float __fsqrt_ru (float __x) { return __ocml_sqrt_rtp_f32 (__x); }
630
-
631
640
__DEVICE__
632
641
float __fsqrt_rz (float __x) { return __ocml_sqrt_rtz_f32 (__x); }
642
+ #else
643
+ __DEVICE__
644
+ float __fsqrt_rn (float __x) { return __ocml_native_sqrt_f32 (__x); }
645
+ #endif
633
646
647
+ #if defined OCML_BASIC_ROUNDED_OPERATIONS
634
648
__DEVICE__
635
649
float __fsub_rd (float __x, float __y) { return __ocml_sub_rtn_f32 (__x, __y); }
636
- #endif
637
650
__DEVICE__
638
651
float __fsub_rn (float __x, float __y) { return __ocml_sub_rte_f32 (__x, __y); }
639
- #if defined OCML_BASIC_ROUNDED_OPERATIONS
640
652
__DEVICE__
641
653
float __fsub_ru (float __x, float __y) { return __ocml_sub_rtp_f32 (__x, __y); }
642
-
643
654
__DEVICE__
644
655
float __fsub_rz (float __x, float __y) { return __ocml_sub_rtz_f32 (__x, __y); }
656
+ #else
657
+ __DEVICE__
658
+ float __fsub_rn (float __x, float __y) { return __x - __y; }
645
659
#endif
660
+
646
661
__DEVICE__
647
662
float __log10f (float __x) { return __ocml_native_log10_f32 (__x); }
648
663
@@ -1071,125 +1086,139 @@ __DEVICE__
1071
1086
double __dadd_rd (double __x, double __y) {
1072
1087
return __ocml_add_rtn_f64 (__x, __y);
1073
1088
}
1074
- #endif
1075
1089
__DEVICE__
1076
1090
double __dadd_rn (double __x, double __y) {
1077
1091
return __ocml_add_rte_f64 (__x, __y);
1078
1092
}
1079
- #if defined OCML_BASIC_ROUNDED_OPERATIONS
1080
1093
__DEVICE__
1081
1094
double __dadd_ru (double __x, double __y) {
1082
1095
return __ocml_add_rtp_f64 (__x, __y);
1083
1096
}
1084
-
1085
1097
__DEVICE__
1086
1098
double __dadd_rz (double __x, double __y) {
1087
1099
return __ocml_add_rtz_f64 (__x, __y);
1088
1100
}
1101
+ #else
1102
+ __DEVICE__
1103
+ double __dadd_rn (double __x, double __y) { return __x + __y; }
1104
+ #endif
1089
1105
1106
+ #if defined OCML_BASIC_ROUNDED_OPERATIONS
1090
1107
__DEVICE__
1091
1108
double __ddiv_rd (double __x, double __y) {
1092
1109
return __ocml_div_rtn_f64 (__x, __y);
1093
1110
}
1094
- #endif
1095
1111
__DEVICE__
1096
1112
double __ddiv_rn (double __x, double __y) {
1097
1113
return __ocml_div_rte_f64 (__x, __y);
1098
1114
}
1099
- #if defined OCML_BASIC_ROUNDED_OPERATIONS
1100
1115
__DEVICE__
1101
1116
double __ddiv_ru (double __x, double __y) {
1102
1117
return __ocml_div_rtp_f64 (__x, __y);
1103
1118
}
1104
-
1105
1119
__DEVICE__
1106
1120
double __ddiv_rz (double __x, double __y) {
1107
1121
return __ocml_div_rtz_f64 (__x, __y);
1108
1122
}
1123
+ #else
1124
+ __DEVICE__
1125
+ double __ddiv_rn (double __x, double __y) { return __x / __y; }
1126
+ #endif
1109
1127
1128
+ #if defined OCML_BASIC_ROUNDED_OPERATIONS
1110
1129
__DEVICE__
1111
1130
double __dmul_rd (double __x, double __y) {
1112
1131
return __ocml_mul_rtn_f64 (__x, __y);
1113
1132
}
1114
- #endif
1115
1133
__DEVICE__
1116
1134
double __dmul_rn (double __x, double __y) {
1117
1135
return __ocml_mul_rte_f64 (__x, __y);
1118
1136
}
1119
- #if defined OCML_BASIC_ROUNDED_OPERATIONS
1120
1137
__DEVICE__
1121
1138
double __dmul_ru (double __x, double __y) {
1122
1139
return __ocml_mul_rtp_f64 (__x, __y);
1123
1140
}
1124
-
1125
1141
__DEVICE__
1126
1142
double __dmul_rz (double __x, double __y) {
1127
1143
return __ocml_mul_rtz_f64 (__x, __y);
1128
1144
}
1129
-
1145
+ # else
1130
1146
__DEVICE__
1131
- double __drcp_rd (double __x) { return __llvm_amdgcn_rcp_f64 ( __x) ; }
1147
+ double __dmul_rn (double __x, double __y ) { return __x * __y ; }
1132
1148
#endif
1133
- __DEVICE__
1134
- double __drcp_rn (double __x) { return __llvm_amdgcn_rcp_f64 (__x); }
1149
+
1135
1150
#if defined OCML_BASIC_ROUNDED_OPERATIONS
1136
1151
__DEVICE__
1137
- double __drcp_ru (double __x) { return __llvm_amdgcn_rcp_f64 (__x); }
1138
-
1152
+ double __drcp_rd (double __x) { return __ocml_div_rtn_f64 (1.0 , __x); }
1153
+ __DEVICE__
1154
+ double __drcp_rn (double __x) { return __ocml_div_rte_f64 (1.0 , __x); }
1155
+ __DEVICE__
1156
+ double __drcp_ru (double __x) { return __ocml_div_rtp_f64 (1.0 , __x); }
1157
+ __DEVICE__
1158
+ double __drcp_rz (double __x) { return __ocml_div_rtz_f64 (1.0 , __x); }
1159
+ #else
1139
1160
__DEVICE__
1140
- double __drcp_rz (double __x) { return __llvm_amdgcn_rcp_f64 (__x); }
1161
+ double __drcp_rn (double __x) { return 1.0 / __x; }
1162
+ #endif
1141
1163
1164
+ #if defined OCML_BASIC_ROUNDED_OPERATIONS
1142
1165
__DEVICE__
1143
1166
double __dsqrt_rd (double __x) { return __ocml_sqrt_rtn_f64 (__x); }
1144
- #endif
1145
1167
__DEVICE__
1146
1168
double __dsqrt_rn (double __x) { return __ocml_sqrt_rte_f64 (__x); }
1147
- #if defined OCML_BASIC_ROUNDED_OPERATIONS
1148
1169
__DEVICE__
1149
1170
double __dsqrt_ru (double __x) { return __ocml_sqrt_rtp_f64 (__x); }
1150
-
1151
1171
__DEVICE__
1152
1172
double __dsqrt_rz (double __x) { return __ocml_sqrt_rtz_f64 (__x); }
1173
+ #else
1174
+ __DEVICE__
1175
+ double __dsqrt_rn (double __x) { return __ocml_sqrt_f64 (__x); }
1176
+ #endif
1153
1177
1178
+ #if defined OCML_BASIC_ROUNDED_OPERATIONS
1154
1179
__DEVICE__
1155
1180
double __dsub_rd (double __x, double __y) {
1156
1181
return __ocml_sub_rtn_f64 (__x, __y);
1157
1182
}
1158
- #endif
1159
1183
__DEVICE__
1160
1184
double __dsub_rn (double __x, double __y) {
1161
1185
return __ocml_sub_rte_f64 (__x, __y);
1162
1186
}
1163
- #if defined OCML_BASIC_ROUNDED_OPERATIONS
1164
1187
__DEVICE__
1165
1188
double __dsub_ru (double __x, double __y) {
1166
1189
return __ocml_sub_rtp_f64 (__x, __y);
1167
1190
}
1168
-
1169
1191
__DEVICE__
1170
1192
double __dsub_rz (double __x, double __y) {
1171
1193
return __ocml_sub_rtz_f64 (__x, __y);
1172
1194
}
1195
+ #else
1196
+ __DEVICE__
1197
+ double __dsub_rn (double __x, double __y) { return __x - __y; }
1198
+ #endif
1173
1199
1200
+ #if defined OCML_BASIC_ROUNDED_OPERATIONS
1174
1201
__DEVICE__
1175
1202
double __fma_rd (double __x, double __y, double __z) {
1176
1203
return __ocml_fma_rtn_f64 (__x, __y, __z);
1177
1204
}
1178
- #endif
1179
1205
__DEVICE__
1180
1206
double __fma_rn (double __x, double __y, double __z) {
1181
1207
return __ocml_fma_rte_f64 (__x, __y, __z);
1182
1208
}
1183
- #if defined OCML_BASIC_ROUNDED_OPERATIONS
1184
1209
__DEVICE__
1185
1210
double __fma_ru (double __x, double __y, double __z) {
1186
1211
return __ocml_fma_rtp_f64 (__x, __y, __z);
1187
1212
}
1188
-
1189
1213
__DEVICE__
1190
1214
double __fma_rz (double __x, double __y, double __z) {
1191
1215
return __ocml_fma_rtz_f64 (__x, __y, __z);
1192
1216
}
1217
+ #else
1218
+ __DEVICE__
1219
+ double __fma_rn (double __x, double __y, double __z) {
1220
+ return __ocml_fma_f64 (__x, __y, __z);
1221
+ }
1193
1222
#endif
1194
1223
// END INTRINSICS
1195
1224
// END DOUBLE
0 commit comments