27
27
#ifndef KMP_STATIC_STEAL_ENABLED
28
28
#define KMP_STATIC_STEAL_ENABLED 1
29
29
#endif
30
+ #define KMP_WEIGHTED_ITERATIONS_SUPPORTED \
31
+ (KMP_AFFINITY_SUPPORTED && KMP_STATIC_STEAL_ENABLED && \
32
+ (KMP_ARCH_X86 || KMP_ARCH_X86_64))
30
33
31
34
#define TASK_CURRENT_NOT_QUEUED 0
32
35
#define TASK_CURRENT_QUEUED 1
@@ -881,14 +884,8 @@ typedef struct kmp_affinity_flags_t {
881
884
KMP_BUILD_ASSERT (sizeof (kmp_affinity_flags_t ) == 4);
882
885
883
886
typedef struct kmp_affinity_ids_t {
887
+ int os_id;
884
888
int ids[KMP_HW_LAST];
885
- int operator [](size_t idx) const { return ids[idx]; }
886
- int &operator [](size_t idx) { return ids[idx]; }
887
- kmp_affinity_ids_t &operator =(const kmp_affinity_ids_t &rhs) {
888
- for (int i = 0 ; i < KMP_HW_LAST; ++i)
889
- ids[i] = rhs[i];
890
- return *this ;
891
- }
892
889
} kmp_affinity_ids_t ;
893
890
894
891
typedef struct kmp_affinity_attrs_t {
@@ -938,6 +935,10 @@ extern kmp_affin_mask_t *__kmp_affin_fullMask;
938
935
extern kmp_affin_mask_t *__kmp_affin_origMask;
939
936
extern char *__kmp_cpuinfo_file;
940
937
938
+ #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
939
+ extern int __kmp_first_osid_with_ecore;
940
+ #endif
941
+
941
942
#endif /* KMP_AFFINITY_SUPPORTED */
942
943
943
944
// This needs to be kept in sync with the values in omp.h !!!
@@ -1849,12 +1850,9 @@ typedef struct kmp_sched_flags {
1849
1850
unsigned ordered : 1 ;
1850
1851
unsigned nomerge : 1 ;
1851
1852
unsigned contains_last : 1 ;
1852
- #if KMP_USE_HIER_SCHED
1853
- unsigned use_hier : 1 ;
1854
- unsigned unused : 28 ;
1855
- #else
1856
- unsigned unused : 29 ;
1857
- #endif
1853
+ unsigned use_hier : 1 ; // Used in KMP_USE_HIER_SCHED code
1854
+ unsigned use_hybrid : 1 ; // Used in KMP_WEIGHTED_ITERATIONS_SUPPORTED code
1855
+ unsigned unused : 27 ;
1858
1856
} kmp_sched_flags_t ;
1859
1857
1860
1858
KMP_BUILD_ASSERT (sizeof (kmp_sched_flags_t ) == 4);
@@ -1868,26 +1866,37 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1868
1866
kmp_int32 st;
1869
1867
kmp_int32 tc;
1870
1868
kmp_lock_t *steal_lock; // lock used for chunk stealing
1869
+
1870
+ kmp_uint32 ordered_lower;
1871
+ kmp_uint32 ordered_upper;
1872
+
1871
1873
// KMP_ALIGN(32) ensures (if the KMP_ALIGN macro is turned on)
1872
1874
// a) parm3 is properly aligned and
1873
1875
// b) all parm1-4 are on the same cache line.
1874
1876
// Because of parm1-4 are used together, performance seems to be better
1875
1877
// if they are on the same cache line (not measured though).
1876
1878
1877
- struct KMP_ALIGN (32 ) { // AC: changed 16 to 32 in order to simplify template
1878
- kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should
1879
- kmp_int32 parm2; // make no real change at least while padding is off.
1879
+ struct KMP_ALIGN (32 ) {
1880
+ kmp_int32 parm1;
1881
+ kmp_int32 parm2;
1880
1882
kmp_int32 parm3;
1881
1883
kmp_int32 parm4;
1882
1884
};
1883
1885
1884
- kmp_uint32 ordered_lower;
1885
- kmp_uint32 ordered_upper;
1886
+ #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
1887
+ kmp_uint32 pchunks;
1888
+ kmp_uint32 num_procs_with_pcore;
1889
+ kmp_int32 first_thread_with_ecore;
1890
+ #endif
1886
1891
#if KMP_OS_WINDOWS
1887
1892
kmp_int32 last_upper;
1888
1893
#endif /* KMP_OS_WINDOWS */
1889
1894
} dispatch_private_info32_t ;
1890
1895
1896
+ #if CACHE_LINE <= 128
1897
+ KMP_BUILD_ASSERT (sizeof (dispatch_private_info32_t ) <= 128);
1898
+ #endif
1899
+
1891
1900
typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1892
1901
kmp_int64 count; // current chunk number for static & static-steal scheduling
1893
1902
kmp_int64 ub; /* upper-bound */
@@ -1896,27 +1905,38 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
1896
1905
kmp_int64 st; /* stride */
1897
1906
kmp_int64 tc; /* trip count (number of iterations) */
1898
1907
kmp_lock_t *steal_lock; // lock used for chunk stealing
1908
+
1909
+ kmp_uint64 ordered_lower;
1910
+ kmp_uint64 ordered_upper;
1899
1911
/* parm[1-4] are used in different ways by different scheduling algorithms */
1900
1912
1901
- // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
1913
+ // KMP_ALIGN(32 ) ensures ( if the KMP_ALIGN macro is turned on )
1902
1914
// a) parm3 is properly aligned and
1903
1915
// b) all parm1-4 are in the same cache line.
1904
1916
// Because of parm1-4 are used together, performance seems to be better
1905
1917
// if they are in the same line (not measured though).
1906
-
1907
1918
struct KMP_ALIGN (32 ) {
1908
1919
kmp_int64 parm1;
1909
1920
kmp_int64 parm2;
1910
1921
kmp_int64 parm3;
1911
1922
kmp_int64 parm4;
1912
1923
};
1913
1924
1914
- kmp_uint64 ordered_lower;
1915
- kmp_uint64 ordered_upper;
1925
+ #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
1926
+ kmp_uint64 pchunks;
1927
+ kmp_uint64 num_procs_with_pcore;
1928
+ kmp_int64 first_thread_with_ecore;
1929
+ #endif
1930
+
1916
1931
#if KMP_OS_WINDOWS
1917
1932
kmp_int64 last_upper;
1918
1933
#endif /* KMP_OS_WINDOWS */
1919
1934
} dispatch_private_info64_t ;
1935
+
1936
+ #if CACHE_LINE <= 128
1937
+ KMP_BUILD_ASSERT (sizeof (dispatch_private_info64_t ) <= 128);
1938
+ #endif
1939
+
1920
1940
#else /* KMP_STATIC_STEAL_ENABLED */
1921
1941
typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
1922
1942
kmp_int32 lb;
@@ -3862,6 +3882,9 @@ extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
3862
3882
extern int __kmp_aux_unset_affinity_mask_proc (int proc, void **mask);
3863
3883
extern int __kmp_aux_get_affinity_mask_proc (int proc, void **mask);
3864
3884
extern void __kmp_balanced_affinity (kmp_info_t *th, int team_size);
3885
+ #if KMP_WEIGHTED_ITERATIONS_SUPPORTED
3886
+ extern int __kmp_get_first_osid_with_ecore (void );
3887
+ #endif
3865
3888
#if KMP_OS_LINUX || KMP_OS_FREEBSD
3866
3889
extern int kmp_set_thread_affinity_mask_initial (void );
3867
3890
#endif
0 commit comments