@@ -993,51 +993,20 @@ __ESIMD_API simd<Tx, N> atomic_update(Tx *p, simd<Toffset, N> offset,
993
993
}
994
994
}
995
995
996
- // / @anchor usm_atomic_update0
997
- // / @brief No-argument variant of the atomic update operation.
998
- // /
999
- // / Atomically updates \c N memory locations represented by a USM pointer and
1000
- // / a vector of offsets relative to the pointer, and returns a vector of old
1001
- // / values found at the memory locations before update. The update operation
1002
- // / has no arguments in addition to the value at the memory location.
1003
- // /
1004
- // / @tparam Op The atomic operation - can be \c atomic_op::inc or
1005
- // / \c atomic_op::dec, \c atomic_op::load.
1006
- // / @tparam Tx The vector element type.
1007
- // / @tparam N The number of memory locations to update.
1008
- // / @param p The USM pointer.
1009
- // / @param offset The vector of 32-bit or 64-bit offsets in bytes.
1010
- // / @param mask Operation mask, only locations with non-zero in the
1011
- // / corresponding mask element are updated.
1012
- // / @return A vector of the old values at the memory locations before the
1013
- // / update.
1014
- // /
1015
- template <atomic_op Op, typename Tx, int N, typename Toffset>
1016
- __ESIMD_API simd<Tx, N> atomic_update (Tx *p, simd<Toffset, N> offset,
1017
- simd_mask<N> mask) {
1018
- static_assert (std::is_integral_v<Toffset>, " Unsupported offset type" );
1019
- detail::check_atomic<Op, Tx, N, 0 >();
1020
- if constexpr (Op == atomic_op::load) {
1021
- return atomic_update<atomic_op::bit_or, Tx, N>(p, offset, simd<Tx, N>(0 ),
1022
- mask);
1023
- } else {
1024
- simd<uintptr_t , N> vAddr (reinterpret_cast <uintptr_t >(p));
1025
- simd<uintptr_t , N> offset_i1 = convert<uintptr_t >(offset);
1026
- vAddr += offset_i1;
1027
- using T = typename detail::__raw_t <Tx>;
1028
- return __esimd_svm_atomic0<Op, T, N>(vAddr.data (), mask.data ());
1029
- }
1030
- }
1031
-
1032
996
// / A variation of \c atomic_update API with \c offsets represented as
1033
997
// / \c simd_view object.
1034
998
// /
1035
- // / @tparam Op The atomic operation - can be \c atomic_op::inc or
1036
- // / \c atomic_op::dec, \c atomic_op::load.
999
+ // / @tparam Op The atomic operation - can be one of the following:
1000
+ // / \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c
1001
+ // / atomic_op::max, \c atomic_op::xchg, \c atomic_op::bit_and, \c
1002
+ // / atomic_op::bit_or, \c atomic_op::bit_xor, \c atomic_op::minsint, \c
1003
+ // / atomic_op::maxsint, \c atomic_op::fmax, \c atomic_op::fmin, \c
1004
+ // / atomic_op::store.
1037
1005
// / @tparam Tx The vector element type.
1038
1006
// / @tparam N The number of memory locations to update.
1039
1007
// / @param p The USM pointer.
1040
1008
// / @param offset The simd_view of 32-bit or 64-bit offsets in bytes.
1009
+ // / @param src0 The additional argument.
1041
1010
// / @param mask Operation mask, only locations with non-zero in the
1042
1011
// / corresponding mask element are updated.
1043
1012
// / @return A vector of the old values at the memory locations before the
@@ -1047,89 +1016,80 @@ template <atomic_op Op, typename Tx, int N, typename Toffset,
1047
1016
typename RegionTy = region1d_t <Toffset, N, 1 >>
1048
1017
__ESIMD_API simd<Tx, N> atomic_update (Tx *p,
1049
1018
simd_view<Toffset, RegionTy> offsets,
1050
- simd_mask< N> mask = 1 ) {
1019
+ simd<Tx, N> src0, simd_mask<N> mask ) {
1051
1020
using Ty = typename simd_view<Toffset, RegionTy>::element_type;
1052
- return atomic_update<Op, Tx, N>(p, simd<Ty, N>(offsets), mask);
1021
+ return atomic_update<Op, Tx, N>(p, simd<Ty, N>(offsets), src0, mask);
1053
1022
}
1054
1023
1055
1024
// / A variation of \c atomic_update API with \c offset represented as
1056
- // / scalar.
1025
+ // / scalar object .
1057
1026
// /
1058
- // / @tparam Op The atomic operation - can be \c atomic_op::inc or
1059
- // / \c atomic_op::dec, \c atomic_op::load.
1027
+ // / @tparam Op The atomic operation - can be one of the following:
1028
+ // / \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c atomic_op::max,
1029
+ // / \c atomic_op::xchg, \c atomic_op::bit_and, \c atomic_op::bit_or,
1030
+ // / \c atomic_op::bit_xor, \c atomic_op::minsint, \c atomic_op::maxsint,
1031
+ // / \c atomic_op::fmax, \c atomic_op::fmin \c atomic_op::store.
1060
1032
// / @tparam Tx The vector element type.
1061
1033
// / @tparam N The number of memory locations to update.
1062
1034
// / @param p The USM pointer.
1063
- // / @param offset The scalar 32-bit or 64-bit offset in bytes.
1035
+ // / @param offset The scalar 32-bit or 64-bit offsets in bytes.
1036
+ // / @param src0 The additional argument.
1064
1037
// / @param mask Operation mask, only locations with non-zero in the
1065
1038
// / corresponding mask element are updated.
1066
1039
// / @return A vector of the old values at the memory locations before the
1067
1040
// / update.
1068
1041
// /
1069
1042
template <atomic_op Op, typename Tx, int N, typename Toffset>
1070
1043
__ESIMD_API std::enable_if_t <std::is_integral_v<Toffset>, simd<Tx, N>>
1071
- atomic_update (Tx *p, Toffset offset, simd_mask< N> mask = 1 ) {
1072
- return atomic_update<Op, Tx, N>(p, simd<Toffset, N>(offset), mask);
1044
+ atomic_update (Tx *p, Toffset offset, simd<Tx, N> src0, simd_mask<N> mask ) {
1045
+ return atomic_update<Op, Tx, N>(p, simd<Toffset, N>(offset), src0, mask);
1073
1046
}
1074
1047
1075
- // / @anchor usm_atomic_update1
1076
- // / @brief Single -argument variant of the atomic update operation.
1048
+ // / @anchor usm_atomic_update0
1049
+ // / @brief No -argument variant of the atomic update operation.
1077
1050
// /
1078
1051
// / Atomically updates \c N memory locations represented by a USM pointer and
1079
1052
// / a vector of offsets relative to the pointer, and returns a vector of old
1080
1053
// / values found at the memory locations before update. The update operation
1081
- // / has 1 additional argument .
1054
+ // / has no arguments in addition to the value at the memory location .
1082
1055
// /
1083
- // / @tparam Op The atomic operation - can be one of the following:
1084
- // / \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c atomic_op::max,
1085
- // / \c atomic_op::xchg, \c atomic_op::bit_and, \c atomic_op::bit_or,
1086
- // / \c atomic_op::bit_xor, \c atomic_op::minsint, \c atomic_op::maxsint,
1087
- // / \c atomic_op::fmax, \c atomic_op::fmin.
1056
+ // / @tparam Op The atomic operation - can be \c atomic_op::inc or
1057
+ // / \c atomic_op::dec, \c atomic_op::load.
1088
1058
// / @tparam Tx The vector element type.
1089
1059
// / @tparam N The number of memory locations to update.
1090
1060
// / @param p The USM pointer.
1091
1061
// / @param offset The vector of 32-bit or 64-bit offsets in bytes.
1092
- // / @param src0 The additional argument.
1093
1062
// / @param mask Operation mask, only locations with non-zero in the
1094
1063
// / corresponding mask element are updated.
1095
1064
// / @return A vector of the old values at the memory locations before the
1096
1065
// / update.
1097
1066
// /
1098
1067
template <atomic_op Op, typename Tx, int N, typename Toffset>
1099
1068
__ESIMD_API simd<Tx, N> atomic_update (Tx *p, simd<Toffset, N> offset,
1100
- simd<Tx, N> src0, simd_mask<N> mask) {
1069
+ simd_mask<N> mask) {
1101
1070
static_assert (std::is_integral_v<Toffset>, " Unsupported offset type" );
1102
- if constexpr ((Op == atomic_op::fmin) || (Op == atomic_op::fmax) ||
1103
- (Op == atomic_op::fadd) || (Op == atomic_op::fsub)) {
1104
- // Auto-convert FP atomics to LSC version. Warning is given - see enum.
1105
- return atomic_update<detail::to_lsc_atomic_op<Op>(), Tx, N>(p, offset, src0,
1106
- mask);
1071
+ detail::check_atomic<Op, Tx, N, 0 >();
1072
+ if constexpr (Op == atomic_op::load) {
1073
+ return atomic_update<atomic_op::bit_or, Tx, N>(p, offset, simd<Tx, N>(0 ),
1074
+ mask);
1107
1075
} else {
1108
- detail::check_atomic<Op, Tx, N, 1 >();
1109
1076
simd<uintptr_t , N> vAddr (reinterpret_cast <uintptr_t >(p));
1110
1077
simd<uintptr_t , N> offset_i1 = convert<uintptr_t >(offset);
1111
1078
vAddr += offset_i1;
1112
-
1113
1079
using T = typename detail::__raw_t <Tx>;
1114
- return __esimd_svm_atomic1<Op, T, N>(vAddr.data (), src0.data (),
1115
- mask.data ());
1080
+ return __esimd_svm_atomic0<Op, T, N>(vAddr.data (), mask.data ());
1116
1081
}
1117
1082
}
1118
1083
1119
1084
// / A variation of \c atomic_update API with \c offsets represented as
1120
1085
// / \c simd_view object.
1121
1086
// /
1122
- // / @tparam Op The atomic operation - can be one of the following:
1123
- // / \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c
1124
- // / atomic_op::max, \c atomic_op::xchg, \c atomic_op::bit_and, \c
1125
- // / atomic_op::bit_or, \c atomic_op::bit_xor, \c atomic_op::minsint, \c
1126
- // / atomic_op::maxsint, \c atomic_op::fmax, \c atomic_op::fmin, \c
1127
- // / atomic_op::store.
1087
+ // / @tparam Op The atomic operation - can be \c atomic_op::inc or
1088
+ // / \c atomic_op::dec, \c atomic_op::load.
1128
1089
// / @tparam Tx The vector element type.
1129
1090
// / @tparam N The number of memory locations to update.
1130
1091
// / @param p The USM pointer.
1131
1092
// / @param offset The simd_view of 32-bit or 64-bit offsets in bytes.
1132
- // / @param src0 The additional argument.
1133
1093
// / @param mask Operation mask, only locations with non-zero in the
1134
1094
// / corresponding mask element are updated.
1135
1095
// / @return A vector of the old values at the memory locations before the
@@ -1139,33 +1099,29 @@ template <atomic_op Op, typename Tx, int N, typename Toffset,
1139
1099
typename RegionTy = region1d_t <Toffset, N, 1 >>
1140
1100
__ESIMD_API simd<Tx, N> atomic_update (Tx *p,
1141
1101
simd_view<Toffset, RegionTy> offsets,
1142
- simd<Tx, N> src0, simd_mask<N> mask) {
1102
+ simd_mask<N> mask = 1 ) {
1143
1103
using Ty = typename simd_view<Toffset, RegionTy>::element_type;
1144
- return atomic_update<Op, Tx, N>(p, simd<Ty, N>(offsets), src0, mask);
1104
+ return atomic_update<Op, Tx, N>(p, simd<Ty, N>(offsets), mask);
1145
1105
}
1146
1106
1147
1107
// / A variation of \c atomic_update API with \c offset represented as
1148
- // / scalar object .
1108
+ // / scalar.
1149
1109
// /
1150
- // / @tparam Op The atomic operation - can be one of the following:
1151
- // / \c atomic_op::add, \c atomic_op::sub, \c atomic_op::min, \c atomic_op::max,
1152
- // / \c atomic_op::xchg, \c atomic_op::bit_and, \c atomic_op::bit_or,
1153
- // / \c atomic_op::bit_xor, \c atomic_op::minsint, \c atomic_op::maxsint,
1154
- // / \c atomic_op::fmax, \c atomic_op::fmin.
1110
+ // / @tparam Op The atomic operation - can be \c atomic_op::inc or
1111
+ // / \c atomic_op::dec, \c atomic_op::load.
1155
1112
// / @tparam Tx The vector element type.
1156
1113
// / @tparam N The number of memory locations to update.
1157
1114
// / @param p The USM pointer.
1158
- // / @param offset The scalar 32-bit or 64-bit offsets in bytes.
1159
- // / @param src0 The additional argument.
1115
+ // / @param offset The scalar 32-bit or 64-bit offset in bytes.
1160
1116
// / @param mask Operation mask, only locations with non-zero in the
1161
1117
// / corresponding mask element are updated.
1162
1118
// / @return A vector of the old values at the memory locations before the
1163
1119
// / update.
1164
1120
// /
1165
1121
template <atomic_op Op, typename Tx, int N, typename Toffset>
1166
1122
__ESIMD_API std::enable_if_t <std::is_integral_v<Toffset>, simd<Tx, N>>
1167
- atomic_update (Tx *p, Toffset offset, simd<Tx, N> src0, simd_mask<N> mask) {
1168
- return atomic_update<Op, Tx, N>(p, simd<Toffset, N>(offset), src0, mask);
1123
+ atomic_update (Tx *p, Toffset offset, simd_mask<N> mask = 1 ) {
1124
+ return atomic_update<Op, Tx, N>(p, simd<Toffset, N>(offset), mask);
1169
1125
}
1170
1126
1171
1127
// / @anchor usm_atomic_update2
0 commit comments