Skip to content

Commit acb3b18

Browse files
committed
[OpenMP][host runtime] Add initial hybrid CPU support
Detect, through CPUID.1A, and show user different core types through KMP_AFFINITY=verbose mechanism. Offer future runtime optimizations __kmp_is_hybrid_cpu() to know whether running on a hybrid system or not. Differential Revision: https://reviews.llvm.org/D110435
1 parent b840d3a commit acb3b18

File tree

5 files changed

+120
-1
lines changed

5 files changed

+120
-1
lines changed

openmp/runtime/src/i18n/en_US.txt

+1
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ OmptOutdatedWorkshare "OMPT: Cannot determine workshare type; using the d
360360
OmpNoAllocator "Allocator %1$s is not available, will use default allocator."
361361
TopologyGeneric "%1$s: %2$s (%3$d total cores)"
362362
AffGranularityBad "%1$s: granularity setting: %2$s does not exist in topology. Using granularity=%3$s instead."
363+
TopologyHybrid "%1$s: hybrid core type detected: %2$d %3$s cores."
363364

364365
# --- OpenMP errors detected at runtime ---
365366
#

openmp/runtime/src/kmp.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -1222,7 +1222,8 @@ typedef struct kmp_cpuid {
12221222
typedef struct kmp_cpuinfo_flags_t {
12231223
unsigned sse2 : 1; // 0 if SSE2 instructions are not supported, 1 otherwise.
12241224
unsigned rtm : 1; // 0 if RTM instructions are not supported, 1 otherwise.
1225-
unsigned reserved : 30; // Ensure size of 32 bits
1225+
unsigned hybrid : 1;
1226+
unsigned reserved : 29; // Ensure size of 32 bits
12261227
} kmp_cpuinfo_flags_t;
12271228

12281229
typedef struct kmp_cpuinfo {
@@ -2984,6 +2985,9 @@ extern int __kmp_storage_map_verbose_specified;
29842985

29852986
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
29862987
extern kmp_cpuinfo_t __kmp_cpuinfo;
2988+
static inline bool __kmp_is_hybrid_cpu() { return __kmp_cpuinfo.flags.hybrid; }
2989+
#else
2990+
static inline bool __kmp_is_hybrid_cpu() { return false; }
29872991
#endif
29882992

29892993
extern volatile int __kmp_init_serial;

openmp/runtime/src/kmp_affinity.cpp

+75
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,20 @@ const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural) {
123123
return ((plural) ? "unknowns" : "unknown");
124124
}
125125

126+
const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) {
127+
switch (type) {
128+
case KMP_HW_CORE_TYPE_UNKNOWN:
129+
return "unknown";
130+
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
131+
case KMP_HW_CORE_TYPE_ATOM:
132+
return "Intel Atom(R) processor";
133+
case KMP_HW_CORE_TYPE_CORE:
134+
return "Intel(R) Core(TM) processor";
135+
#endif
136+
}
137+
return "unknown";
138+
}
139+
126140
////////////////////////////////////////////////////////////////////////////////
127141
// kmp_hw_thread_t methods
128142
int kmp_hw_thread_t::compare_ids(const void *a, const void *b) {
@@ -174,6 +188,9 @@ void kmp_hw_thread_t::print() const {
174188
for (int i = 0; i < depth; ++i) {
175189
printf("%4d ", ids[i]);
176190
}
191+
if (core_type != KMP_HW_CORE_TYPE_UNKNOWN) {
192+
printf(" (%s)", __kmp_hw_get_core_type_string(core_type));
193+
}
177194
printf("\n");
178195
}
179196

@@ -298,13 +315,20 @@ void kmp_topology_t::_set_last_level_cache() {
298315
void kmp_topology_t::_gather_enumeration_information() {
299316
int previous_id[KMP_HW_LAST];
300317
int max[KMP_HW_LAST];
318+
int previous_core_id = kmp_hw_thread_t::UNKNOWN_ID;
301319

302320
for (int i = 0; i < depth; ++i) {
303321
previous_id[i] = kmp_hw_thread_t::UNKNOWN_ID;
304322
max[i] = 0;
305323
count[i] = 0;
306324
ratio[i] = 0;
307325
}
326+
if (__kmp_is_hybrid_cpu()) {
327+
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
328+
core_types_count[i] = 0;
329+
core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
330+
}
331+
}
308332
for (int i = 0; i < num_hw_threads; ++i) {
309333
kmp_hw_thread_t &hw_thread = hw_threads[i];
310334
for (int layer = 0; layer < depth; ++layer) {
@@ -326,6 +350,15 @@ void kmp_topology_t::_gather_enumeration_information() {
326350
for (int layer = 0; layer < depth; ++layer) {
327351
previous_id[layer] = hw_thread.ids[layer];
328352
}
353+
// Figure out the number of each core type for hybrid CPUs
354+
if (__kmp_is_hybrid_cpu()) {
355+
int core_level = get_level(KMP_HW_CORE);
356+
if (core_level != -1) {
357+
if (hw_thread.ids[core_level] != previous_core_id)
358+
_increment_core_type(hw_thread.core_type);
359+
previous_core_id = hw_thread.ids[core_level];
360+
}
361+
}
329362
}
330363
for (int layer = 0; layer < depth; ++layer) {
331364
if (max[layer] > ratio[layer])
@@ -478,6 +511,19 @@ void kmp_topology_t::dump() const {
478511
}
479512
printf("\n");
480513

514+
printf("* core_types:\n");
515+
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
516+
if (core_types[i] != KMP_HW_CORE_TYPE_UNKNOWN) {
517+
printf(" %d %s core%c\n", core_types_count[i],
518+
__kmp_hw_get_core_type_string(core_types[i]),
519+
((core_types_count[i] > 1) ? 's' : ' '));
520+
} else {
521+
if (i == 0)
522+
printf("No hybrid information available\n");
523+
break;
524+
}
525+
}
526+
481527
printf("* equivalent map:\n");
482528
KMP_FOREACH_HW_TYPE(i) {
483529
const char *key = __kmp_hw_get_keyword(i);
@@ -571,6 +617,15 @@ void kmp_topology_t::print(const char *env_var) const {
571617
}
572618
KMP_INFORM(TopologyGeneric, env_var, buf.str, ncores);
573619

620+
if (__kmp_is_hybrid_cpu()) {
621+
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
622+
if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN)
623+
break;
624+
KMP_INFORM(TopologyHybrid, env_var, core_types_count[i],
625+
__kmp_hw_get_core_type_string(core_types[i]));
626+
}
627+
}
628+
574629
if (num_hw_threads <= 0) {
575630
__kmp_str_buf_free(&buf);
576631
return;
@@ -585,6 +640,9 @@ void kmp_topology_t::print(const char *env_var) const {
585640
__kmp_str_buf_print(&buf, "%s ", __kmp_hw_get_catalog_string(type));
586641
__kmp_str_buf_print(&buf, "%d ", hw_threads[i].ids[level]);
587642
}
643+
if (__kmp_is_hybrid_cpu())
644+
__kmp_str_buf_print(
645+
&buf, "(%s)", __kmp_hw_get_core_type_string(hw_threads[i].core_type));
588646
KMP_INFORM(OSProcMapToPack, env_var, hw_threads[i].os_id, buf.str);
589647
}
590648

@@ -1782,6 +1840,16 @@ static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *const msg_id) {
17821840
return true;
17831841
}
17841842

1843+
// Hybrid cpu detection using CPUID.1A
1844+
// Thread should be pinned to processor already
1845+
static void __kmp_get_hybrid_info(kmp_hw_core_type_t *type,
1846+
unsigned *native_model_id) {
1847+
kmp_cpuid buf;
1848+
__kmp_x86_cpuid(0x1a, 0, &buf);
1849+
*type = (kmp_hw_core_type_t)__kmp_extract_bits<24, 31>(buf.eax);
1850+
*native_model_id = __kmp_extract_bits<0, 23>(buf.eax);
1851+
}
1852+
17851853
// Intel(R) microarchitecture code name Nehalem, Dunnington and later
17861854
// architectures support a newer interface for specifying the x2APIC Ids,
17871855
// based on CPUID.B or CPUID.1F
@@ -2051,6 +2119,13 @@ static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *const msg_id) {
20512119
hw_thread.ids[idx] >>= my_levels[j - 1].mask_width;
20522120
}
20532121
}
2122+
// Hybrid information
2123+
if (__kmp_is_hybrid_cpu() && highest_leaf >= 0x1a) {
2124+
kmp_hw_core_type_t type;
2125+
unsigned native_model_id;
2126+
__kmp_get_hybrid_info(&type, &native_model_id);
2127+
hw_thread.core_type = type;
2128+
}
20542129
hw_thread_index++;
20552130
}
20562131
KMP_ASSERT(hw_thread_index > 0);

openmp/runtime/src/kmp_affinity.h

+33
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,17 @@ class KMPNativeAffinity : public KMPAffinity {
598598
#endif /* KMP_OS_WINDOWS */
599599
#endif /* KMP_AFFINITY_SUPPORTED */
600600

601+
typedef enum kmp_hw_core_type_t {
602+
KMP_HW_CORE_TYPE_UNKNOWN = 0x0,
603+
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
604+
KMP_HW_CORE_TYPE_ATOM = 0x20,
605+
KMP_HW_CORE_TYPE_CORE = 0x40,
606+
KMP_HW_MAX_NUM_CORE_TYPES = 3,
607+
#else
608+
KMP_HW_MAX_NUM_CORE_TYPES = 1,
609+
#endif
610+
} kmp_hw_core_type_t;
611+
601612
class kmp_hw_thread_t {
602613
public:
603614
static const int UNKNOWN_ID = -1;
@@ -607,11 +618,14 @@ class kmp_hw_thread_t {
607618
int sub_ids[KMP_HW_LAST];
608619
bool leader;
609620
int os_id;
621+
kmp_hw_core_type_t core_type;
622+
610623
void print() const;
611624
void clear() {
612625
for (int i = 0; i < (int)KMP_HW_LAST; ++i)
613626
ids[i] = UNKNOWN_ID;
614627
leader = false;
628+
core_type = KMP_HW_CORE_TYPE_UNKNOWN;
615629
}
616630
};
617631

@@ -637,6 +651,11 @@ class kmp_topology_t {
637651
// Storage containing the absolute number of each topology layer
638652
int *count;
639653

654+
// Storage containing the core types and the number of
655+
// each core type for hybrid processors
656+
kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
657+
int core_types_count[KMP_HW_MAX_NUM_CORE_TYPES];
658+
640659
// The hardware threads array
641660
// hw_threads is num_hw_threads long
642661
// Each hw_thread's ids and sub_ids are depth deep
@@ -675,6 +694,20 @@ class kmp_topology_t {
675694
// Set the last level cache equivalent type
676695
void _set_last_level_cache();
677696

697+
// Increments the number of cores of type 'type'
698+
void _increment_core_type(kmp_hw_core_type_t type) {
699+
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) {
700+
if (core_types[i] == KMP_HW_CORE_TYPE_UNKNOWN) {
701+
core_types[i] = type;
702+
core_types_count[i] = 1;
703+
break;
704+
} else if (core_types[i] == type) {
705+
core_types_count[i]++;
706+
break;
707+
}
708+
}
709+
}
710+
678711
public:
679712
// Force use of allocate()/deallocate()
680713
kmp_topology_t() = delete;

openmp/runtime/src/kmp_utility.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,19 @@ void __kmp_query_cpuid(kmp_cpuinfo_t *p) {
248248
}
249249
#endif
250250
p->flags.rtm = 0;
251+
p->flags.hybrid = 0;
251252
if (max_arg > 7) {
252253
/* RTM bit CPUID.07:EBX, bit 11 */
254+
/* HYRBID bit CPUID.07:EDX, bit 15 */
253255
__kmp_x86_cpuid(7, 0, &buf);
254256
p->flags.rtm = (buf.ebx >> 11) & 1;
257+
p->flags.hybrid = (buf.edx >> 15) & 1;
255258
if (p->flags.rtm) {
256259
KA_TRACE(trace_level, (" RTM"));
257260
}
261+
if (p->flags.hybrid) {
262+
KA_TRACE(trace_level, (" HYBRID"));
263+
}
258264
}
259265
}
260266

0 commit comments

Comments
 (0)