@@ -4089,15 +4089,22 @@ slm_atomic_update_impl(simd<uint32_t, N> offsets, simd<T, N> src0,
4089
4089
constexpr lsc_data_size EDS = expand_data_size (finalize_data_size<T, DS>());
4090
4090
constexpr lsc_vector_size VS = to_lsc_vector_size<1 >();
4091
4091
constexpr lsc_data_order Transposed = lsc_data_order::nontranspose;
4092
- using MsgT = typename lsc_expand_type<T>::type;
4093
4092
constexpr int IOp = lsc_to_internal_atomic_op<T, Op>();
4094
- simd<MsgT, N> Msg_data = lsc_format_input<MsgT>(src0);
4095
- simd<MsgT, N> Tmp =
4096
- __esimd_lsc_xatomic_slm_1<MsgT, IOp, cache_hint::none, cache_hint::none,
4097
- AddressScale, ImmOffset, EDS, VS, Transposed,
4098
- N>(pred.data (), offsets.data (),
4099
- Msg_data.data ());
4100
- return lsc_format_ret<T>(Tmp);
4093
+ if constexpr (std::is_same_v<T, double >) {
4094
+ return __esimd_lsc_xatomic_slm_1<T, IOp, cache_hint::none, cache_hint::none,
4095
+ AddressScale, ImmOffset, EDS, VS,
4096
+ Transposed, N>(pred.data (), offsets.data (),
4097
+ src0.data ());
4098
+ } else {
4099
+ using MsgT = typename lsc_expand_type<T>::type;
4100
+ simd<MsgT, N> Msg_data = lsc_format_input<MsgT>(src0);
4101
+ simd<MsgT, N> Tmp =
4102
+ __esimd_lsc_xatomic_slm_1<MsgT, IOp, cache_hint::none, cache_hint::none,
4103
+ AddressScale, ImmOffset, EDS, VS, Transposed,
4104
+ N>(pred.data (), offsets.data (),
4105
+ Msg_data.data ());
4106
+ return lsc_format_ret<T>(Tmp);
4107
+ }
4101
4108
}
4102
4109
4103
4110
// / SLM atomic.
@@ -4126,16 +4133,23 @@ __ESIMD_API simd<T, N> slm_atomic_update_impl(simd<uint32_t, N> offsets,
4126
4133
constexpr lsc_data_size EDS = expand_data_size (finalize_data_size<T, DS>());
4127
4134
constexpr lsc_vector_size VS = to_lsc_vector_size<1 >();
4128
4135
constexpr lsc_data_order Transposed = lsc_data_order::nontranspose;
4129
- using MsgT = typename lsc_expand_type<T>::type;
4130
4136
constexpr int IOp = lsc_to_internal_atomic_op<T, Op>();
4131
- simd<MsgT, N> Msg_data0 = lsc_format_input<MsgT>(src0);
4132
- simd<MsgT, N> Msg_data1 = lsc_format_input<MsgT>(src1);
4133
- simd<MsgT, N> Tmp =
4134
- __esimd_lsc_xatomic_slm_2<MsgT, IOp, cache_hint::none, cache_hint::none,
4135
- AddressScale, ImmOffset, EDS, VS, Transposed,
4136
- N>(pred.data (), offsets.data (),
4137
- Msg_data0.data (), Msg_data1.data ());
4138
- return lsc_format_ret<T>(Tmp);
4137
+ if constexpr (std::is_same_v<T, double >) {
4138
+ return __esimd_lsc_xatomic_slm_2<T, IOp, cache_hint::none, cache_hint::none,
4139
+ AddressScale, ImmOffset, EDS, VS,
4140
+ Transposed, N>(pred.data (), offsets.data (),
4141
+ src0.data (), src1.data ());
4142
+ } else {
4143
+ using MsgT = typename lsc_expand_type<T>::type;
4144
+ simd<MsgT, N> Msg_data0 = lsc_format_input<MsgT>(src0);
4145
+ simd<MsgT, N> Msg_data1 = lsc_format_input<MsgT>(src1);
4146
+ simd<MsgT, N> Tmp =
4147
+ __esimd_lsc_xatomic_slm_2<MsgT, IOp, cache_hint::none, cache_hint::none,
4148
+ AddressScale, ImmOffset, EDS, VS, Transposed,
4149
+ N>(pred.data (), offsets.data (),
4150
+ Msg_data0.data (), Msg_data1.data ());
4151
+ return lsc_format_ret<T>(Tmp);
4152
+ }
4139
4153
}
4140
4154
4141
4155
} // namespace detail
0 commit comments