@@ -3043,7 +3043,7 @@ gather(AccessorT acc, simd<OffsetT, N / VS> byte_offsets,
3043
3043
// / simd<T, N> gather(AccessorT acc, simd<OffsetT, N / VS> byte_offsets,
3044
3044
// / simd_mask<N / VS> mask,
3045
3045
// / PropertyListT props = {}); // (acc-ga-2)
3046
- // / Supported platforms: DG2, PVC in most cases. The DG2/PVC is not required if
3046
+ // / Supported platforms: DG2, PVC in most cases. DG2/PVC is not required if
3047
3047
// / VS == 1 and no L1/L2 cache hints used and sizeof(T) <= 4 and N = {1,8,16,32}
3048
3048
// /
3049
3049
// / Loads ("gathers") elements of the type 'T' from memory locations addressed
@@ -3111,7 +3111,7 @@ gather(AccessorT acc, simd<OffsetT, N / VS> byte_offsets,
3111
3111
// / typename PropertyListT = empty_properties_t>
3112
3112
// / simd<T, N> gather(AccessorT acc, simd<OffsetT, N / VS> byte_offsets,
3113
3113
// / PropertyListT props = {}); // (acc-ga-3)
3114
- // / Supported platforms: DG2, PVC in most cases. The DG2/PVC is not required if
3114
+ // / Supported platforms: DG2, PVC in most cases. DG2/PVC is not required if
3115
3115
// / VS == 1 and no L1/L2 cache hints used and sizeof(T) <= 4 and N = {1,8,16,32}
3116
3116
// /
3117
3117
// / Loads ("gathers") elements of the type 'T' from memory locations addressed
@@ -7389,6 +7389,317 @@ __ESIMD_API
7389
7389
flags);
7390
7390
}
7391
7391
7392
+ // / Variant of gather that uses local accessor as a parameter
7393
+ // / template <typename T, int N, int VS, typename AccessorT,
7394
+ // / typename PropertyListT = empty_properties_t>
7395
+ // / simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7396
+ // / simd_mask<N / VS> mask, simd<T, N> pass_thru,
7397
+ // / PropertyListT props = {}); // (lacc-ga-1)
7398
+ // / simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7399
+ // / simd_mask<N / VS> mask,
7400
+ // / PropertyListT props = {}); // (lacc-ga-2)
7401
+ // / simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7402
+ // / PropertyListT props = {}); // (lacc-ga-3)
7403
+ // /
7404
+ // / The next 3 functions are similar to (lacc-ga-1,2,3), but they don't have
7405
+ // / the template parameter 'VS'. These functions are added for convenience and
7406
+ // / to make it possible for the user to omit the template parameters T and N,
7407
+ // / e.g. 'auto res = gather(acc, byte_offsets);
7408
+ // / template <typename T, int N, typename AccessorT,
7409
+ // / typename PropertyListT = empty_properties_t>
7410
+ // / simd<T, N> gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7411
+ // / simd_mask<N> mask, simd<T, N> pass_thru,
7412
+ // / PropertyListT props = {}); // (lacc-ga-4)
7413
+ // / simd<T, N> gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7414
+ // / simd_mask<N> mask, PropertyListT props = {});//(lacc-ga-5)
7415
+ // / simd<T, N> gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7416
+ // / PropertyListT props = {}); // (lacc-ga-6)
7417
+ // /
7418
+ // / The next 3 functions are similar to (lacc-ga-1,2,3), but accept the
7419
+ // / \p byte_offsets as a \c simd_view argument:
7420
+ // / template <typename T, int N, int VS = 1, typename AccessorT,
7421
+ // / typename OffsetSimdViewT,
7422
+ // typename PropertyListT = empty_properties_t>
7423
+ // / simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
7424
+ // / simd_mask<N / VS> mask, simd<T, N> pass_thru,
7425
+ // / PropertyListT props = {}); // (lacc-ga-7)
7426
+ // / simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
7427
+ // / simd_mask<N / VS> mask,
7428
+ // / PropertyListT props = {}); // (lacc-ga-8)
7429
+ // / simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
7430
+ // / PropertyListT props = {}); // (lacc-ga-9)
7431
+
7432
+ // / template <typename T, int N, int VS, typename AccessorT,
7433
+ // / typename PropertyListT = empty_properties_t>
7434
+ // / simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7435
+ // / simd_mask<N / VS> mask, simd<T, N> pass_thru,
7436
+ // / PropertyListT props = {}); // (lacc-ga-1)
7437
+ // / Supported platforms: DG2, PVC only - Temporary restriction for the variant
7438
+ // / with pass_thru operand. The only exception: DG2/PVC is not required if
7439
+ // / the __ESIMD_GATHER_SCATTER_LLVM_IR macro is used.
7440
+ // /
7441
+ // / Loads ("gathers") elements of the type 'T' from memory locations addressed
7442
+ // / by the local accessor \p acc and byte offsets \p byte_offsets, and returns
7443
+ // / the loaded elements.
7444
+ // / Access to any element's memory location can be disabled via the input vector
7445
+ // / of predicates \p mask. If mask[i] is unset, then the load from
7446
+ // / (acc + byte_offsets[i]) is skipped and the corresponding i-th element from
7447
+ // / \p pass_thru operand is returned.
7448
+ // / @tparam T Element type.
7449
+ // / @tparam N Number of elements to read.
7450
+ // / @tparam VS Vector size. It can also be read as the number of reads per each
7451
+ // / address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
7452
+ // / only on DG2 and PVC and only for 4- and 8-byte element vectors.
7453
+ // / @param acc Accessor referencing the data to load.
7454
+ // / @param byte_offsets the vector of 32-bit offsets in bytes.
7455
+ // / For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
7456
+ // / If the alignment property is not passed, then it is assumed that each
7457
+ // / accessed address is aligned by element-size.
7458
+ // / @param mask The access mask.
7459
+ // / @param pass_thru The vector pass through values.
7460
+ // / @param props The optional compile-time properties. Only 'alignment'
7461
+ // / property is used.
7462
+ // / @return A vector of elements read.
7463
+ template <typename T, int N, int VS, typename AccessorT,
7464
+ typename PropertyListT =
7465
+ ext::oneapi::experimental::detail::empty_properties_t >
7466
+ __ESIMD_API std::enable_if_t <
7467
+ (detail::is_local_accessor_with_v<AccessorT,
7468
+ detail::accessor_mode_cap::can_read> &&
7469
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7470
+ simd<T, N>>
7471
+ gather (AccessorT acc, simd<uint32_t , N / VS> byte_offsets,
7472
+ simd_mask<N / VS> mask, simd<T, N> pass_thru, PropertyListT props = {}) {
7473
+ return slm_gather<T, N, VS>(byte_offsets +
7474
+ __ESIMD_DNS::localAccessorToOffset (acc),
7475
+ mask, pass_thru, props);
7476
+ }
7477
+
7478
+ // / template <typename T, int N, int VS, typename AccessorT,
7479
+ // / typename PropertyListT = empty_properties_t>
7480
+ // / simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7481
+ // / simd_mask<N / VS> mask,
7482
+ // / PropertyListT props = {}); // (lacc-ga-2)
7483
+ // / Supported platforms: DG2, PVC in most cases. DG2/PVC is not required if
7484
+ // / VS == 1 and the __ESIMD_GATHER_SCATTER_LLVM_IR macro is used or sizeof(T) <=
7485
+ // / 4 and N = {1,2,4,8,16,32}
7486
+ // /
7487
+ // / Loads ("gathers") elements of the type 'T' from memory locations addressed
7488
+ // / by the local accessor \p acc and byte offsets \p byte_offsets, and returns
7489
+ // / the loaded elements.
7490
+ // / Access to any element's memory location can be disabled via the input vector
7491
+ // / of predicates \p mask. If mask[i] is unset, then the load from
7492
+ // / (acc + byte_offsets[i]) is skipped and the corresponding i-th element of
7493
+ // / the returned vector is undefined.
7494
+ // / @tparam T Element type.
7495
+ // / @tparam N Number of elements to read.
7496
+ // / @tparam VS Vector size. It can also be read as the number of reads per each
7497
+ // / address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
7498
+ // / only on DG2 and PVC and only for 4- and 8-byte element vectors.
7499
+ // / @param acc Accessor referencing the data to load.
7500
+ // / @param byte_offsets the vector of 32-bit offsets in bytes.
7501
+ // / For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
7502
+ // / If the alignment property is not passed, then it is assumed that each
7503
+ // / accessed address is aligned by element-size.
7504
+ // / @param mask The access mask.
7505
+ // / @param props The optional compile-time properties. Only 'alignment'
7506
+ // / property is used.
7507
+ // / @return A vector of elements read.
7508
+ template <typename T, int N, int VS, typename AccessorT,
7509
+ typename PropertyListT =
7510
+ ext::oneapi::experimental::detail::empty_properties_t >
7511
+ __ESIMD_API std::enable_if_t <
7512
+ (detail::is_local_accessor_with_v<AccessorT,
7513
+ detail::accessor_mode_cap::can_read> &&
7514
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7515
+ simd<T, N>>
7516
+ gather (AccessorT acc, simd<uint32_t , N / VS> byte_offsets,
7517
+ simd_mask<N / VS> mask, PropertyListT props = {}) {
7518
+ return slm_gather<T, N, VS>(
7519
+ byte_offsets + __ESIMD_DNS::localAccessorToOffset (acc), mask, props);
7520
+ }
7521
+
7522
+ // / template <typename T, int N, int VS, typename AccessorT,
7523
+ // / typename PropertyListT = empty_properties_t>
7524
+ // / simd<T, N> gather(AccessorT acc, simd<uint32_t, N / VS> byte_offsets,
7525
+ // / PropertyListT props = {}); // (lacc-ga-3)
7526
+ // / Supported platforms: DG2, PVC in most cases. DG2/PVC is not required if
7527
+ // / VS == 1 and the __ESIMD_GATHER_SCATTER_LLVM_IR macro is used or sizeof(T) <=
7528
+ // / 4 and N = {1,2,4,8,16,32}
7529
+ // /
7530
+ // / Loads ("gathers") elements of the type 'T' from memory locations addressed
7531
+ // / by the local accessor \p acc and byte offsets \p byte_offsets, and returns
7532
+ // / the loaded elements.
7533
+ // / @tparam T Element type.
7534
+ // / @tparam N Number of elements to read.
7535
+ // / @tparam VS Vector size. It can also be read as the number of reads per each
7536
+ // / address. The parameter 'N' must be divisible by 'VS'. (VS > 1) is supported
7537
+ // / only on DG2 and PVC and only for 4- and 8-byte element vectors.
7538
+ // / @param acc Accessor referencing the data to load.
7539
+ // / @param byte_offsets the vector of 32-bit offsets in bytes.
7540
+ // / For each i, ((byte*)p + byte_offsets[i]) must be element size aligned.
7541
+ // / If the alignment property is not passed, then it is assumed that each
7542
+ // / accessed address is aligned by element-size.
7543
+ // / @param props The optional compile-time properties. Only 'alignment'
7544
+ // / and cache hint properties are used.
7545
+ // / @return A vector of elements read.
7546
+ template <typename T, int N, int VS, typename AccessorT,
7547
+ typename PropertyListT =
7548
+ ext::oneapi::experimental::detail::empty_properties_t >
7549
+ __ESIMD_API std::enable_if_t <
7550
+ (detail::is_local_accessor_with_v<AccessorT,
7551
+ detail::accessor_mode_cap::can_read> &&
7552
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7553
+ simd<T, N>>
7554
+ gather (AccessorT acc, simd<uint32_t , N / VS> byte_offsets,
7555
+ PropertyListT props = {}) {
7556
+ return slm_gather<T, N, VS>(
7557
+ byte_offsets + __ESIMD_DNS::localAccessorToOffset (acc), props);
7558
+ }
7559
+
7560
+ // / template <typename T, int N, typename AccessorT,
7561
+ // / typename PropertyListT = empty_properties_t>
7562
+ // / simd<T, N> gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7563
+ // / simd_mask<N> mask, simd<T, N> pass_thru,
7564
+ // / PropertyListT props = {}); // (lacc-ga-4)
7565
+ // / This function is identical to (lacc-ga-1) except that vector size is fixed
7566
+ // / to 1. This variant is added for convenience and lets the user omit the
7567
+ // / template arguments and call the function as 'gather(acc, byte_offsets, mask,
7568
+ // / pass_thru);'.
7569
+ // Dev note: the mask type was turned into template parameter `MaskT` to
7570
+ // avoid the conflicts of this prototype with the old gather() function
7571
+ // accepting a 'global_offset' parameter and avoid 'ambiguous call' errors
7572
+ // for calls like this: gather(acc, byte_offsets_simd, 0, mask);
7573
+ template <typename T, int N, typename AccessorT, typename MaskT,
7574
+ typename PropertyListT =
7575
+ ext::oneapi::experimental::detail::empty_properties_t >
7576
+ __ESIMD_API std::enable_if_t <
7577
+ (detail::is_local_accessor_with_v<AccessorT,
7578
+ detail::accessor_mode_cap::can_read> &&
7579
+ std::is_same_v<MaskT, simd_mask<N>> &&
7580
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7581
+ simd<T, N>>
7582
+ gather (AccessorT acc, simd<uint32_t , N> byte_offsets, MaskT mask,
7583
+ simd<T, N> pass_thru, PropertyListT props = {}) {
7584
+ return slm_gather<T, N>(byte_offsets +
7585
+ __ESIMD_DNS::localAccessorToOffset (acc),
7586
+ mask, pass_thru, props);
7587
+ }
7588
+
7589
+ // / template <typename T, int N, typename AccessorT,
7590
+ // / typename PropertyListT = empty_properties_t>
7591
+ // / simd<T, N> gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7592
+ // / simd_mask<N> mask, PropertyListT props // (lacc-ga-5)
7593
+ // / This function is identical to (lacc-ga-2) except that vector size is fixed
7594
+ // / to 1. This variant is added for convenience and let user omit the template
7595
+ // / arguments and call the function as 'gather(acc, byte_offsets, mask);'.
7596
+ // Dev note: the mask type was turned into template parameter `MaskT` to
7597
+ // avoid the conflicts of this prototype with the old gather() function
7598
+ // accepting a 'global_offset' parameter and avoid 'ambiguous call' errors
7599
+ // for calls like this: gather(acc, byte_offsets_simd, 0);
7600
+ template <typename T, int N, typename AccessorT, typename MaskT,
7601
+ typename PropertyListT =
7602
+ ext::oneapi::experimental::detail::empty_properties_t >
7603
+ __ESIMD_API std::enable_if_t <
7604
+ (detail::is_local_accessor_with_v<AccessorT,
7605
+ detail::accessor_mode_cap::can_read> &&
7606
+ std::is_same_v<MaskT, simd_mask<N>> &&
7607
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7608
+ simd<T, N>>
7609
+ gather (AccessorT acc, simd<uint32_t , N> byte_offsets, MaskT mask,
7610
+ PropertyListT props = {}) {
7611
+ return slm_gather<T, N>(
7612
+ byte_offsets + __ESIMD_DNS::localAccessorToOffset (acc), mask, props);
7613
+ }
7614
+
7615
+ // / template <typename T, int N, typename AccessorT,
7616
+ // / typename PropertyListT = empty_properties_t>
7617
+ // / simd<T, N> gather(AccessorT acc, simd<uint32_t, N> byte_offsets,
7618
+ // / PropertyListT props = {}); // (lacc-ga-6)
7619
+ // / This function is identical to (lacc-ga-3) except that vector size is fixed
7620
+ // / to 1. This variant is added for convenience and let user omit the template
7621
+ // / arguments and call the function as 'gather(acc, byte_offsets);'.
7622
+ template <typename T, int N, typename AccessorT,
7623
+ typename PropertyListT =
7624
+ ext::oneapi::experimental::detail::empty_properties_t >
7625
+ __ESIMD_API std::enable_if_t <
7626
+ (detail::is_local_accessor_with_v<AccessorT,
7627
+ detail::accessor_mode_cap::can_read> &&
7628
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7629
+ simd<T, N>>
7630
+ gather (AccessorT acc, simd<uint32_t , N> byte_offsets,
7631
+ PropertyListT props = {}) {
7632
+ return slm_gather<T, N>(
7633
+ byte_offsets + __ESIMD_DNS::localAccessorToOffset (acc), props);
7634
+ }
7635
+
7636
+ // / template <typename T, int N, int VS = 1,
7637
+ // / typename OffsetSimdViewT,
7638
+ // typename PropertyListT = empty_properties_t>
7639
+ // / simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
7640
+ // / simd_mask<N / VS> mask, simd<T, N> pass_thru,
7641
+ // / PropertyListT props = {}); // (lacc-ga-7)
7642
+ // / This function is identical to (lacc-ga-1) except that the \p byte_offsets
7643
+ // / is represented as \c simd_view.
7644
+ template <typename T, int N, int VS = 1 , typename AccessorT,
7645
+ typename OffsetSimdViewT,
7646
+ typename PropertyListT =
7647
+ ext::oneapi::experimental::detail::empty_properties_t >
7648
+ __ESIMD_API std::enable_if_t <
7649
+ (detail::is_local_accessor_with_v<AccessorT,
7650
+ detail::accessor_mode_cap::can_read> &&
7651
+ detail::is_simd_view_type_v<OffsetSimdViewT> &&
7652
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7653
+ simd<T, N>>
7654
+ gather (AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
7655
+ simd<T, N> pass_thru, PropertyListT props = {}) {
7656
+ return gather<T, N, VS>(acc, byte_offsets.read (), mask, pass_thru, props);
7657
+ }
7658
+
7659
+ // / template <typename T, int N, int VS = 1, typename AccessorT,
7660
+ // / typename OffsetSimdViewT,
7661
+ // typename PropertyListT = empty_properties_t>
7662
+ // / simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
7663
+ // / simd_mask<N / VS> mask,
7664
+ // / PropertyListT props = {}); // (lacc-ga-8)
7665
+ // / This function is identical to (lacc-ga-2) except that the \p byte_offsets
7666
+ // / is represented as \c simd_view.
7667
+ template <typename T, int N, int VS = 1 , typename AccessorT,
7668
+ typename OffsetSimdViewT,
7669
+ typename PropertyListT =
7670
+ ext::oneapi::experimental::detail::empty_properties_t >
7671
+ __ESIMD_API std::enable_if_t <
7672
+ (detail::is_local_accessor_with_v<AccessorT,
7673
+ detail::accessor_mode_cap::can_read> &&
7674
+ detail::is_simd_view_type_v<OffsetSimdViewT> &&
7675
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7676
+ simd<T, N>>
7677
+ gather (AccessorT acc, OffsetSimdViewT byte_offsets, simd_mask<N / VS> mask,
7678
+ PropertyListT props = {}) {
7679
+ return gather<T, N, VS>(acc, byte_offsets.read (), mask, props);
7680
+ }
7681
+
7682
+ // / template <typename T, int N, int VS = 1, typename AccessorT,
7683
+ // / typename OffsetSimdViewT,
7684
+ // typename PropertyListT = empty_properties_t>
7685
+ // / simd<T, N> gather(AccessorT acc, OffsetSimdViewT byte_offsets,
7686
+ // / PropertyListT props = {}); // (lacc-ga-9)
7687
+ // / This function is identical to (lacc-ga-3) except that the \p byte_offsets
7688
+ // / is represented as \c simd_view.
7689
+ template <typename T, int N, int VS = 1 , typename AccessorT,
7690
+ typename OffsetSimdViewT,
7691
+ typename PropertyListT =
7692
+ ext::oneapi::experimental::detail::empty_properties_t >
7693
+ __ESIMD_API std::enable_if_t <
7694
+ (detail::is_local_accessor_with_v<AccessorT,
7695
+ detail::accessor_mode_cap::can_read> &&
7696
+ detail::is_simd_view_type_v<OffsetSimdViewT> &&
7697
+ ext::oneapi::experimental::is_property_list_v<PropertyListT>),
7698
+ simd<T, N>>
7699
+ gather (AccessorT acc, OffsetSimdViewT byte_offsets, PropertyListT props = {}) {
7700
+ return gather<T, N, VS>(acc, byte_offsets.read (), props);
7701
+ }
7702
+
7392
7703
// / Variant of gather that uses local accessor as a parameter
7393
7704
// /
7394
7705
// / Collects elements located at given offsets in an accessor and returns them
@@ -7411,7 +7722,7 @@ __ESIMD_API
7411
7722
std::enable_if_t <detail::is_local_accessor_with_v<
7412
7723
AccessorTy, detail::accessor_mode_cap::can_read>,
7413
7724
simd<T, N>>
7414
- gather (AccessorTy acc, simd<uint32_t , N> offsets, uint32_t glob_offset = 0 ,
7725
+ gather (AccessorTy acc, simd<uint32_t , N> offsets, uint32_t glob_offset,
7415
7726
simd_mask<N> mask = 1 ) {
7416
7727
return slm_gather<T, N>(
7417
7728
offsets + glob_offset + __ESIMD_DNS::localAccessorToOffset (acc), mask);
0 commit comments